test classes moved in specific components

This commit is contained in:
Claudio Atzori 2021-08-13 12:14:47 +02:00
parent 3359f73fcf
commit baed5e3337
7 changed files with 284 additions and 469 deletions

View File

@ -1,246 +0,0 @@
package eu.dnetlib.dhp.common.collection;
import java.io.*;
import java.nio.file.Files;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.common.collection.models.CSVProgramme;
import eu.dnetlib.dhp.common.collection.models.CSVProject;
import eu.dnetlib.dhp.common.collection.models.DOAJModel;
import eu.dnetlib.dhp.common.collection.models.UnibiGoldModel;
import jdk.nashorn.internal.ir.annotations.Ignore;
public class GetCSVTest {
private static String workingDir;
private static LocalFileSystem fs;
@Disabled
@Test
void getProgrammeFileTest() throws Exception {
String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv";
GetCSV
.getCsv(
fs, new BufferedReader(
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))),
workingDir + "/programme",
"eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';');
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
String line;
int count = 0;
while ((line = in.readLine()) != null) {
CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class);
if (count == 0) {
Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f."));
Assertions
.assertTrue(
csvp
.getTitle()
.startsWith(
"Develop the governance for the advancement of responsible research and innovation by all stakeholders"));
Assertions
.assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation"));
Assertions.assertTrue(csvp.getShortTitle().equals(""));
Assertions.assertTrue(csvp.getLanguage().equals("en"));
}
if (count == 28) {
Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4."));
Assertions
.assertTrue(
csvp
.getTitle()
.equals(
"Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation"));
Assertions
.assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation"));
Assertions.assertTrue(csvp.getLanguage().equals("de"));
}
if (count == 229) {
Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2."));
Assertions
.assertTrue(
csvp
.getTitle()
.equals(
"SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy"));
Assertions
.assertTrue(
csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy"));
Assertions.assertTrue(csvp.getLanguage().equals("en"));
}
Assertions.assertTrue(csvp.getCode() != null);
Assertions.assertTrue(csvp.getCode().startsWith("H2020"));
count += 1;
}
Assertions.assertEquals(767, count);
}
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(GetCSVTest.class.getSimpleName())
.toString();
fs = FileSystem.getLocal(new Configuration());
}
@Disabled
@Test
void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException {
String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv";
// String fileURL = "/Users/miriam.baglioni/Downloads/cordis-h2020projects.csv";
GetCSV
.getCsv(
fs,
new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))
// new BufferedReader(new FileReader(fileURL))
, workingDir + "/projects",
"eu.dnetlib.dhp.common.collection.models.CSVProject", ';');
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects"))));
String line;
int count = 0;
while ((line = in.readLine()) != null) {
CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class);
if (count == 0) {
Assertions.assertTrue(csvp.getId().equals("771736"));
Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.1."));
Assertions.assertTrue(csvp.getTopics().equals("ERC-2017-COG"));
}
if (count == 22882) {
Assertions.assertTrue(csvp.getId().equals("752903"));
Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2."));
Assertions.assertTrue(csvp.getTopics().equals("MSCA-IF-2016"));
}
if (count == 223023) {
Assertions.assertTrue(csvp.getId().equals("861952"));
Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.4.e."));
Assertions.assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019"));
}
Assertions.assertTrue(csvp.getId() != null);
Assertions.assertTrue(csvp.getProgramme().startsWith("H2020"));
count += 1;
}
Assertions.assertEquals(34957, count);
}
@Disabled
@Test
void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException {
String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv";
GetCSV
.getCsv(
fs, new BufferedReader(
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))),
workingDir + "/programme",
"eu.dnetlib.dhp.common.collection.models.UnibiGoldModel", ',');
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
String line;
int count = 0;
while ((line = in.readLine()) != null) {
UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class);
if (count == 0) {
Assertions.assertTrue(unibi.getIssn().equals("0001-625X"));
Assertions.assertTrue(unibi.getIssn_l().equals("0001-625X"));
Assertions.assertTrue(unibi.getTitle().equals("Acta Mycologica"));
}
if (count == 43158) {
Assertions.assertTrue(unibi.getIssn().equals("2088-6330"));
Assertions.assertTrue(unibi.getIssn_l().equals("2088-6330"));
Assertions.assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama"));
}
if (count == 67027) {
Assertions.assertTrue(unibi.getIssn().equals("2658-7068"));
Assertions.assertTrue(unibi.getIssn_l().equals("2308-2488"));
Assertions.assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ."));
}
count += 1;
}
Assertions.assertEquals(67028, count);
}
@Disabled
@Test
void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException {
String fileURL = "https://doaj.org/csv";
try (BufferedReader in = new BufferedReader(
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) {
try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) {
String line;
while ((line = in.readLine()) != null) {
writer.println(line.replace("\\\"", "\""));
}
}
}
GetCSV
.getCsv(
fs, new BufferedReader(
new FileReader("/tmp/DOAJ_1.csv")),
workingDir + "/programme",
"eu.dnetlib.dhp.common.collection.models.DOAJModel", ',');
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
String line;
int count = 0;
while ((line = in.readLine()) != null) {
DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class);
if (count == 0) {
Assertions.assertEquals("0001-3765", doaj.getIssn());
Assertions.assertEquals("1678-2690", doaj.getEissn());
Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle());
}
if (count == 7904) {
System.out.println(new ObjectMapper().writeValueAsString(doaj));
Assertions.assertEquals("", doaj.getIssn());
Assertions.assertEquals("2055-7159", doaj.getEissn());
Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle());
}
if (count == 16707) {
Assertions.assertEquals("", doaj.getIssn());
Assertions.assertEquals("2788-6298", doaj.getEissn());
Assertions
.assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle());
}
count += 1;
}
Assertions.assertEquals(16713, count);
}
}

View File

@ -1,80 +0,0 @@
package eu.dnetlib.dhp.common.collection.models;
import java.io.Serializable;
import com.opencsv.bean.CsvBindByName;
import com.opencsv.bean.CsvIgnore;
/**
* The model for the programme csv file
*/
public class CSVProgramme implements Serializable {
@CsvBindByName(column = "code")
private String code;
@CsvBindByName(column = "title")
private String title;
@CsvBindByName(column = "shortTitle")
private String shortTitle;
@CsvBindByName(column = "language")
private String language;
@CsvIgnore
private String classification;
@CsvIgnore
private String classification_short;
public String getClassification_short() {
return classification_short;
}
public void setClassification_short(String classification_short) {
this.classification_short = classification_short;
}
public String getClassification() {
return classification;
}
public void setClassification(String classification) {
this.classification = classification;
}
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getShortTitle() {
return shortTitle;
}
public void setShortTitle(String shortTitle) {
this.shortTitle = shortTitle;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
}

View File

@ -1,46 +0,0 @@
package eu.dnetlib.dhp.common.collection.models;
import java.io.Serializable;
import com.opencsv.bean.CsvBindByName;
/**
* the mmodel for the projects csv file
*/
public class CSVProject implements Serializable {
@CsvBindByName(column = "id")
private String id;
@CsvBindByName(column = "programme")
private String programme;
@CsvBindByName(column = "topics")
private String topics;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getProgramme() {
return programme;
}
public void setProgramme(String programme) {
this.programme = programme;
}
public String getTopics() {
return topics;
}
public void setTopics(String topics) {
this.topics = topics;
}
}

View File

@ -1,52 +0,0 @@
package eu.dnetlib.dhp.common.collection.models;
import java.io.Serializable;
import com.opencsv.bean.CsvBindByName;
public class DOAJModel implements Serializable {
@CsvBindByName(column = "Journal title")
private String journalTitle;
@CsvBindByName(column = "Journal ISSN (print version)")
private String issn;
@CsvBindByName(column = "Journal EISSN (online version)")
private String eissn;
@CsvBindByName(column = "Review process")
private String reviewProcess;
public String getJournalTitle() {
return journalTitle;
}
public void setJournalTitle(String journalTitle) {
this.journalTitle = journalTitle;
}
public String getIssn() {
return issn;
}
public void setIssn(String issn) {
this.issn = issn;
}
public String getEissn() {
return eissn;
}
public void setEissn(String eissn) {
this.eissn = eissn;
}
public String getReviewProcess() {
return reviewProcess;
}
public void setReviewProcess(String reviewProcess) {
this.reviewProcess = reviewProcess;
}
}

View File

@ -1,45 +0,0 @@
package eu.dnetlib.dhp.common.collection.models;
import java.io.Serializable;
import com.opencsv.bean.CsvBindByName;
public class UnibiGoldModel implements Serializable {
@CsvBindByName(column = "ISSN")
private String issn;
@CsvBindByName(column = "ISSN_L")
private String issn_l;
@CsvBindByName(column = "TITLE")
private String title;
@CsvBindByName(column = "TITLE_SOURCE")
private String title_source;
public String getIssn() {
return issn;
}
public void setIssn(String issn) {
this.issn = issn;
}
public String getIssn_l() {
return issn_l;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getTitle_source() {
return title_source;
}
public void setTitle_source(String title_source) {
this.title_source = title_source;
}
}

View File

@ -0,0 +1,145 @@
package eu.dnetlib.dhp.actionmanager.project;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.GetCSV;
import eu.dnetlib.dhp.common.collection.HttpConnector2;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.jupiter.api.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class DownloadCsvTest {
private static String workingDir;
private static LocalFileSystem fs;
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(DownloadCsvTest.class.getSimpleName())
.toString();
fs = FileSystem.getLocal(new Configuration());
}
@Disabled
@Test
void getProgrammeFileTest() throws Exception {
String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv";
GetCSV
.getCsv(
fs, new BufferedReader(
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))),
workingDir + "/programme",
CSVProgramme.class.getName(), ';');
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
String line;
int count = 0;
while ((line = in.readLine()) != null) {
CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class);
if (count == 0) {
assertTrue(csvp.getCode().equals("H2020-EU.5.f."));
assertTrue(
csvp
.getTitle()
.startsWith(
"Develop the governance for the advancement of responsible research and innovation by all stakeholders"));
assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation"));
assertTrue(csvp.getShortTitle().equals(""));
assertTrue(csvp.getLanguage().equals("en"));
}
if (count == 28) {
assertTrue(csvp.getCode().equals("H2020-EU.3.5.4."));
assertTrue(
csvp
.getTitle()
.equals(
"Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation"));
assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation"));
assertTrue(csvp.getLanguage().equals("de"));
}
if (count == 229) {
assertTrue(csvp.getCode().equals("H2020-EU.3.2."));
assertTrue(
csvp
.getTitle()
.equals(
"SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy"));
assertTrue(
csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy"));
assertTrue(csvp.getLanguage().equals("en"));
}
assertTrue(csvp.getCode() != null);
assertTrue(csvp.getCode().startsWith("H2020"));
count += 1;
}
Assertions.assertEquals(767, count);
}
@Disabled
@Test
void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException {
String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv";
GetCSV
.getCsv(
fs,
new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))
, workingDir + "/projects",
CSVProject.class.getName(), ';');
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects"))));
String line;
int count = 0;
while ((line = in.readLine()) != null) {
CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class);
if (count == 0) {
assertTrue(csvp.getId().equals("771736"));
assertTrue(csvp.getProgramme().equals("H2020-EU.1.1."));
assertTrue(csvp.getTopics().equals("ERC-2017-COG"));
}
if (count == 22882) {
assertTrue(csvp.getId().equals("752903"));
assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2."));
assertTrue(csvp.getTopics().equals("MSCA-IF-2016"));
}
if (count == 223023) {
assertTrue(csvp.getId().equals("861952"));
assertTrue(csvp.getProgramme().equals("H2020-EU.4.e."));
assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019"));
}
assertTrue(csvp.getId() != null);
assertTrue(csvp.getProgramme().startsWith("H2020"));
count += 1;
}
Assertions.assertEquals(34957, count);
}
@AfterAll
public static void cleanup() {
FileUtils.deleteQuietly(new File(workingDir));
}
}

View File

@ -0,0 +1,139 @@
package eu.dnetlib.dhp.oa.graph.hostedbymap;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.common.collection.GetCSV;
import eu.dnetlib.dhp.common.collection.HttpConnector2;
import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel;
import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.jupiter.api.*;
import java.io.*;
import java.nio.file.Files;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class DownloadCsvTest {
private static String workingDir;
private static LocalFileSystem fs;
@BeforeAll
public static void beforeAll() throws IOException {
workingDir = Files
.createTempDirectory(DownloadCsvTest.class.getSimpleName())
.toString();
fs = FileSystem.getLocal(new Configuration());
}
@Disabled
@Test
void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException {
String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv";
GetCSV
.getCsv(
fs, new BufferedReader(
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))),
workingDir + "/programme",
UnibiGoldModel.class.getName());
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
String line;
int count = 0;
while ((line = in.readLine()) != null) {
UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class);
if (count == 0) {
assertTrue(unibi.getIssn().equals("0001-625X"));
assertTrue(unibi.getIssnL().equals("0001-625X"));
assertTrue(unibi.getTitle().equals("Acta Mycologica"));
}
if (count == 43158) {
assertTrue(unibi.getIssn().equals("2088-6330"));
assertTrue(unibi.getIssnL().equals("2088-6330"));
assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama"));
}
if (count == 67027) {
assertTrue(unibi.getIssn().equals("2658-7068"));
assertTrue(unibi.getIssnL().equals("2308-2488"));
assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ."));
}
count += 1;
}
Assertions.assertEquals(67028, count);
}
@Disabled
@Test
void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException {
String fileURL = "https://doaj.org/csv";
try (BufferedReader in = new BufferedReader(
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) {
try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) {
String line;
while ((line = in.readLine()) != null) {
writer.println(line.replace("\\\"", "\""));
}
}
}
GetCSV
.getCsv(
fs, new BufferedReader(
new FileReader("/tmp/DOAJ_1.csv")),
workingDir + "/programme",
DOAJModel.class.getName());
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
String line;
int count = 0;
while ((line = in.readLine()) != null) {
DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class);
if (count == 0) {
Assertions.assertEquals("0001-3765", doaj.getIssn());
Assertions.assertEquals("1678-2690", doaj.getEissn());
Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle());
}
if (count == 7904) {
System.out.println(new ObjectMapper().writeValueAsString(doaj));
Assertions.assertEquals("", doaj.getIssn());
Assertions.assertEquals("2055-7159", doaj.getEissn());
Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle());
}
if (count == 16707) {
Assertions.assertEquals("", doaj.getIssn());
Assertions.assertEquals("2788-6298", doaj.getEissn());
Assertions
.assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle());
}
count += 1;
}
Assertions.assertEquals(16713, count);
}
@AfterAll
public static void cleanup() {
FileUtils.deleteQuietly(new File(workingDir));
}
}