55 lines
1.5 KiB
Java
55 lines
1.5 KiB
Java
package eu.dnetlib.data.mdstore.manager.utils;
|
|
|
|
import java.io.IOException;
|
|
import java.util.LinkedHashMap;
|
|
import java.util.LinkedHashSet;
|
|
import java.util.Map;
|
|
import java.util.Set;
|
|
|
|
import org.apache.avro.generic.GenericRecord;
|
|
import org.apache.hadoop.conf.Configuration;
|
|
import org.apache.hadoop.fs.Path;
|
|
import org.junit.jupiter.api.BeforeEach;
|
|
import org.junit.jupiter.api.Disabled;
|
|
import org.junit.jupiter.api.Test;
|
|
|
|
import parquet.avro.AvroParquetReader;
|
|
import parquet.hadoop.ParquetReader;
|
|
|
|
@Disabled
|
|
class HdfsClientTest {
|
|
|
|
private static final String PARQUET_FILE = "file:///Users/michele/Desktop/part-00000-e3675dc3-69fb-422e-a159-78e34cfe14d2-c000.snappy.parquet";
|
|
|
|
@BeforeEach
|
|
void setUp() throws Exception {}
|
|
|
|
@SuppressWarnings("unchecked")
|
|
@Test
|
|
void testParquet() throws IllegalArgumentException, IOException {
|
|
|
|
System.out.println("Opening parquet file: " + PARQUET_FILE);
|
|
|
|
try (final ParquetReader<GenericRecord> reader =
|
|
AvroParquetReader.<GenericRecord> builder(new Path(PARQUET_FILE)).withConf(new Configuration()).build()) {
|
|
System.out.println("File OPENED");
|
|
|
|
GenericRecord rec = null;
|
|
final Set<String> fields = new LinkedHashSet<>();
|
|
while ((rec = reader.read()) != null) {
|
|
if (fields.isEmpty()) {
|
|
rec.getSchema().getFields().forEach(f -> fields.add(f.name()));
|
|
}
|
|
|
|
final Map<String, Object> map = new LinkedHashMap<>();
|
|
for (final String f : fields) {
|
|
map.put(f, rec.get(f));
|
|
}
|
|
|
|
System.out.println(map);
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|