forked from D-Net/dnet-hadoop
GetCSV refactoring - added test class (all the tests are disabled since they refer to remote resource)
This commit is contained in:
parent
bfe8f5335c
commit
733bcaecf6
|
@ -0,0 +1,245 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.collection;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.file.Files;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocalFileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.common.collection.models.CSVProgramme;
|
||||
import eu.dnetlib.dhp.common.collection.models.CSVProject;
|
||||
import eu.dnetlib.dhp.common.collection.models.DOAJModel;
|
||||
import eu.dnetlib.dhp.common.collection.models.UnibiGoldModel;
|
||||
|
||||
public class GetCSVTest {
|
||||
|
||||
private static String workingDir;
|
||||
|
||||
private static LocalFileSystem fs;
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files
|
||||
.createTempDirectory(GetCSVTest.class.getSimpleName())
|
||||
.toString();
|
||||
|
||||
fs = FileSystem.getLocal(new Configuration());
|
||||
}
|
||||
|
||||
@Disabled
|
||||
@Test
|
||||
void getProgrammeFileTest() throws Exception {
|
||||
|
||||
String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv";
|
||||
|
||||
GetCSV
|
||||
.getCsv(
|
||||
fs, new BufferedReader(
|
||||
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))),
|
||||
workingDir + "/programme",
|
||||
"eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';');
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
|
||||
|
||||
String line;
|
||||
int count = 0;
|
||||
while ((line = in.readLine()) != null) {
|
||||
CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class);
|
||||
if (count == 0) {
|
||||
Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f."));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
csvp
|
||||
.getTitle()
|
||||
.startsWith(
|
||||
"Develop the governance for the advancement of responsible research and innovation by all stakeholders"));
|
||||
Assertions
|
||||
.assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation"));
|
||||
Assertions.assertTrue(csvp.getShortTitle().equals(""));
|
||||
Assertions.assertTrue(csvp.getLanguage().equals("en"));
|
||||
}
|
||||
if (count == 28) {
|
||||
Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4."));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
csvp
|
||||
.getTitle()
|
||||
.equals(
|
||||
"Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation"));
|
||||
Assertions
|
||||
.assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation"));
|
||||
Assertions.assertTrue(csvp.getLanguage().equals("de"));
|
||||
}
|
||||
if (count == 229) {
|
||||
Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2."));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
csvp
|
||||
.getTitle()
|
||||
.equals(
|
||||
"SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy"));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy"));
|
||||
Assertions.assertTrue(csvp.getLanguage().equals("en"));
|
||||
}
|
||||
Assertions.assertTrue(csvp.getCode() != null);
|
||||
Assertions.assertTrue(csvp.getCode().startsWith("H2020"));
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Assertions.assertEquals(767, count);
|
||||
}
|
||||
|
||||
@Disabled
|
||||
@Test
|
||||
void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException {
|
||||
String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv";
|
||||
// String fileURL = "/Users/miriam.baglioni/Downloads/cordis-h2020projects.csv";
|
||||
|
||||
GetCSV
|
||||
.getCsv(
|
||||
fs,
|
||||
new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))
|
||||
// new BufferedReader(new FileReader(fileURL))
|
||||
, workingDir + "/projects",
|
||||
"eu.dnetlib.dhp.common.collection.models.CSVProject", ';');
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects"))));
|
||||
|
||||
String line;
|
||||
int count = 0;
|
||||
while ((line = in.readLine()) != null) {
|
||||
CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class);
|
||||
if (count == 0) {
|
||||
Assertions.assertTrue(csvp.getId().equals("771736"));
|
||||
Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.1."));
|
||||
Assertions.assertTrue(csvp.getTopics().equals("ERC-2017-COG"));
|
||||
|
||||
}
|
||||
if (count == 22882) {
|
||||
Assertions.assertTrue(csvp.getId().equals("752903"));
|
||||
Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2."));
|
||||
Assertions.assertTrue(csvp.getTopics().equals("MSCA-IF-2016"));
|
||||
}
|
||||
if (count == 223023) {
|
||||
Assertions.assertTrue(csvp.getId().equals("861952"));
|
||||
Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.4.e."));
|
||||
Assertions.assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019"));
|
||||
}
|
||||
Assertions.assertTrue(csvp.getId() != null);
|
||||
Assertions.assertTrue(csvp.getProgramme().startsWith("H2020"));
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Assertions.assertEquals(34957, count);
|
||||
}
|
||||
|
||||
@Disabled
|
||||
@Test
|
||||
void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException {
|
||||
|
||||
String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv";
|
||||
|
||||
GetCSV
|
||||
.getCsv(
|
||||
fs, new BufferedReader(
|
||||
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))),
|
||||
workingDir + "/programme",
|
||||
"eu.dnetlib.dhp.common.collection.models.UnibiGoldModel", ',');
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
|
||||
|
||||
String line;
|
||||
int count = 0;
|
||||
while ((line = in.readLine()) != null) {
|
||||
UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class);
|
||||
if (count == 0) {
|
||||
Assertions.assertTrue(unibi.getIssn().equals("0001-625X"));
|
||||
Assertions.assertTrue(unibi.getIssn_l().equals("0001-625X"));
|
||||
Assertions.assertTrue(unibi.getTitle().equals("Acta Mycologica"));
|
||||
|
||||
}
|
||||
if (count == 43158) {
|
||||
Assertions.assertTrue(unibi.getIssn().equals("2088-6330"));
|
||||
Assertions.assertTrue(unibi.getIssn_l().equals("2088-6330"));
|
||||
Assertions.assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama"));
|
||||
|
||||
}
|
||||
if (count == 67027) {
|
||||
Assertions.assertTrue(unibi.getIssn().equals("2658-7068"));
|
||||
Assertions.assertTrue(unibi.getIssn_l().equals("2308-2488"));
|
||||
Assertions.assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ."));
|
||||
}
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Assertions.assertEquals(67028, count);
|
||||
}
|
||||
|
||||
@Disabled
|
||||
@Test
|
||||
void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException {
|
||||
|
||||
String fileURL = "https://doaj.org/csv";
|
||||
|
||||
try (BufferedReader in = new BufferedReader(
|
||||
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) {
|
||||
try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) {
|
||||
String line;
|
||||
while ((line = in.readLine()) != null) {
|
||||
writer.println(line.replace("\\\"", "\""));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GetCSV
|
||||
.getCsv(
|
||||
fs, new BufferedReader(
|
||||
new FileReader("/tmp/DOAJ_1.csv")),
|
||||
workingDir + "/programme",
|
||||
"eu.dnetlib.dhp.common.collection.models.DOAJModel", ',');
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
|
||||
|
||||
String line;
|
||||
int count = 0;
|
||||
while ((line = in.readLine()) != null) {
|
||||
DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class);
|
||||
if (count == 0) {
|
||||
Assertions.assertTrue(doaj.getIssn().equals("0001-3765"));
|
||||
Assertions.assertTrue(doaj.getEissn().equals("1678-2690"));
|
||||
Assertions.assertTrue(doaj.getJournalTitle().equals("Anais da Academia Brasileira de Ciências"));
|
||||
|
||||
}
|
||||
if (count == 7902) {
|
||||
|
||||
Assertions.assertTrue(doaj.getIssn().equals(""));
|
||||
Assertions.assertTrue(doaj.getEissn().equals("2055-7159"));
|
||||
Assertions.assertTrue(doaj.getJournalTitle().equals("BJR|case reports"));
|
||||
}
|
||||
if (count == 16703) {
|
||||
|
||||
Assertions.assertTrue(doaj.getIssn().equals(""));
|
||||
Assertions.assertTrue(doaj.getEissn().equals("2788-6298"));
|
||||
Assertions
|
||||
.assertTrue(doaj.getJournalTitle().equals("Teacher Education through Flexible Learning in Africa"));
|
||||
}
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Assertions.assertEquals(16709, count);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue