2022-06-21 23:07:06 +02:00
|
|
|
|
2022-04-07 14:06:38 +02:00
|
|
|
package eu.dnetlib.dhp.collection.plugin.file;
|
|
|
|
|
2022-06-21 23:07:06 +02:00
|
|
|
import java.io.IOException;
|
|
|
|
import java.util.HashMap;
|
|
|
|
import java.util.stream.Stream;
|
|
|
|
|
2022-04-28 15:31:11 +02:00
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
|
|
import org.apache.hadoop.fs.FileSystem;
|
|
|
|
import org.apache.hadoop.fs.LocalFileSystem;
|
2022-04-07 14:06:38 +02:00
|
|
|
import org.junit.jupiter.api.Assertions;
|
|
|
|
import org.junit.jupiter.api.BeforeEach;
|
|
|
|
import org.junit.jupiter.api.Test;
|
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
2022-06-21 23:07:06 +02:00
|
|
|
import eu.dnetlib.dhp.collection.ApiDescriptor;
|
|
|
|
import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
|
|
|
|
import eu.dnetlib.dhp.common.collection.CollectorException;
|
|
|
|
import net.bytebuddy.asm.Advice;
|
2022-04-07 14:06:38 +02:00
|
|
|
|
|
|
|
public class FileCollectorPluginTest {
|
|
|
|
|
2022-06-21 23:07:06 +02:00
|
|
|
private static final Logger log = LoggerFactory.getLogger(FileGZipCollectorPluginTest.class);
|
2022-04-07 14:06:38 +02:00
|
|
|
|
2022-06-21 23:07:06 +02:00
|
|
|
private final ApiDescriptor api = new ApiDescriptor();
|
2022-04-28 15:31:11 +02:00
|
|
|
|
2022-06-21 23:07:06 +02:00
|
|
|
private FileCollectorPlugin plugin;
|
2022-04-07 14:06:38 +02:00
|
|
|
|
2022-06-21 23:07:06 +02:00
|
|
|
private static final String SPLIT_ON_ELEMENT = "repository";
|
2022-04-07 14:06:38 +02:00
|
|
|
|
2022-06-21 23:07:06 +02:00
|
|
|
@BeforeEach
|
|
|
|
public void setUp() throws IOException {
|
2022-04-07 14:06:38 +02:00
|
|
|
|
2022-06-21 23:07:06 +02:00
|
|
|
final String gzipFile = this
|
|
|
|
.getClass()
|
|
|
|
.getResource("/eu/dnetlib/dhp/collection/plugin/file/opendoar.xml")
|
|
|
|
.getFile();
|
2022-04-07 14:06:38 +02:00
|
|
|
|
2022-06-21 23:07:06 +02:00
|
|
|
api.setBaseUrl(gzipFile);
|
2022-04-07 14:06:38 +02:00
|
|
|
|
2022-06-21 23:07:06 +02:00
|
|
|
HashMap<String, String> params = new HashMap<>();
|
|
|
|
params.put("splitOnElement", SPLIT_ON_ELEMENT);
|
2022-04-07 14:06:38 +02:00
|
|
|
|
2022-06-21 23:07:06 +02:00
|
|
|
api.setParams(params);
|
2022-04-07 14:06:38 +02:00
|
|
|
|
2022-06-21 23:07:06 +02:00
|
|
|
FileSystem fs = FileSystem.get(new Configuration());
|
|
|
|
plugin = new FileCollectorPlugin(fs);
|
|
|
|
}
|
2022-04-07 14:06:38 +02:00
|
|
|
|
2022-06-21 23:07:06 +02:00
|
|
|
@Test
|
|
|
|
void test() throws CollectorException {
|
2022-04-07 14:06:38 +02:00
|
|
|
|
2022-06-21 23:07:06 +02:00
|
|
|
final Stream<String> stream = plugin.collect(api, new AggregatorReport());
|
2022-04-07 14:06:38 +02:00
|
|
|
|
2022-06-21 23:07:06 +02:00
|
|
|
stream.limit(10).forEach(s -> {
|
|
|
|
Assertions.assertTrue(s.length() > 0);
|
|
|
|
log.info(s);
|
|
|
|
});
|
|
|
|
}
|
2022-04-07 14:06:38 +02:00
|
|
|
}
|