dnet-hadoop/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/file/FileGZipCollectorPluginTest...

69 lines
1.8 KiB
Java
Raw Normal View History

2022-06-21 23:07:06 +02:00
2022-04-07 13:31:22 +02:00
package eu.dnetlib.dhp.collection.plugin.file;
2022-06-21 23:07:06 +02:00
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.HashMap;
import java.util.Objects;
import java.util.stream.Stream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.junit.jupiter.api.*;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mockito;
import org.mockito.junit.jupiter.MockitoExtension;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
2022-06-21 23:07:06 +02:00
import eu.dnetlib.dhp.collection.ApiDescriptor;
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.collection.CollectorException;
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
@ExtendWith(MockitoExtension.class)
public class FileGZipCollectorPluginTest {
2022-06-21 23:07:06 +02:00
private static final Logger log = LoggerFactory.getLogger(FileGZipCollectorPluginTest.class);
2022-06-21 23:07:06 +02:00
private final ApiDescriptor api = new ApiDescriptor();
2022-06-21 23:07:06 +02:00
private FileGZipCollectorPlugin plugin;
2022-06-21 23:07:06 +02:00
private static final String SPLIT_ON_ELEMENT = "repository";
2022-06-21 23:07:06 +02:00
@BeforeEach
public void setUp() throws IOException {
2022-06-21 23:07:06 +02:00
final String gzipFile = Objects
.requireNonNull(
this
.getClass()
.getResource("/eu/dnetlib/dhp/collection/plugin/file/opendoar.xml.gz"))
.getFile();
2022-06-21 23:07:06 +02:00
api.setBaseUrl(gzipFile);
2022-06-21 23:07:06 +02:00
HashMap<String, String> params = new HashMap<>();
params.put("splitOnElement", SPLIT_ON_ELEMENT);
2022-06-21 23:07:06 +02:00
api.setParams(params);
2022-06-21 23:07:06 +02:00
FileSystem fs = FileSystem.get(new Configuration());
plugin = new FileGZipCollectorPlugin(fs);
}
2022-06-21 23:07:06 +02:00
@Test
void test() throws CollectorException {
2022-06-21 23:07:06 +02:00
final Stream<String> stream = plugin.collect(api, new AggregatorReport());
2022-06-21 23:07:06 +02:00
stream.limit(10).forEach(s -> {
Assertions.assertTrue(s.length() > 0);
log.info(s);
});
}
}