forked from antonis.lempesis/dnet-hadoop
test classes moved in specific components
This commit is contained in:
parent
3359f73fcf
commit
baed5e3337
|
@ -1,246 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.collection;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.file.Files;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocalFileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.common.collection.models.CSVProgramme;
|
||||
import eu.dnetlib.dhp.common.collection.models.CSVProject;
|
||||
import eu.dnetlib.dhp.common.collection.models.DOAJModel;
|
||||
import eu.dnetlib.dhp.common.collection.models.UnibiGoldModel;
|
||||
import jdk.nashorn.internal.ir.annotations.Ignore;
|
||||
|
||||
public class GetCSVTest {
|
||||
|
||||
private static String workingDir;
|
||||
|
||||
private static LocalFileSystem fs;
|
||||
|
||||
@Disabled
|
||||
@Test
|
||||
void getProgrammeFileTest() throws Exception {
|
||||
|
||||
String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv";
|
||||
|
||||
GetCSV
|
||||
.getCsv(
|
||||
fs, new BufferedReader(
|
||||
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))),
|
||||
workingDir + "/programme",
|
||||
"eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';');
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
|
||||
|
||||
String line;
|
||||
int count = 0;
|
||||
while ((line = in.readLine()) != null) {
|
||||
CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class);
|
||||
if (count == 0) {
|
||||
Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f."));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
csvp
|
||||
.getTitle()
|
||||
.startsWith(
|
||||
"Develop the governance for the advancement of responsible research and innovation by all stakeholders"));
|
||||
Assertions
|
||||
.assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation"));
|
||||
Assertions.assertTrue(csvp.getShortTitle().equals(""));
|
||||
Assertions.assertTrue(csvp.getLanguage().equals("en"));
|
||||
}
|
||||
if (count == 28) {
|
||||
Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4."));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
csvp
|
||||
.getTitle()
|
||||
.equals(
|
||||
"Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation"));
|
||||
Assertions
|
||||
.assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation"));
|
||||
Assertions.assertTrue(csvp.getLanguage().equals("de"));
|
||||
}
|
||||
if (count == 229) {
|
||||
Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2."));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
csvp
|
||||
.getTitle()
|
||||
.equals(
|
||||
"SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy"));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy"));
|
||||
Assertions.assertTrue(csvp.getLanguage().equals("en"));
|
||||
}
|
||||
Assertions.assertTrue(csvp.getCode() != null);
|
||||
Assertions.assertTrue(csvp.getCode().startsWith("H2020"));
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Assertions.assertEquals(767, count);
|
||||
}
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files
|
||||
.createTempDirectory(GetCSVTest.class.getSimpleName())
|
||||
.toString();
|
||||
|
||||
fs = FileSystem.getLocal(new Configuration());
|
||||
}
|
||||
|
||||
@Disabled
|
||||
@Test
|
||||
void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException {
|
||||
String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv";
|
||||
// String fileURL = "/Users/miriam.baglioni/Downloads/cordis-h2020projects.csv";
|
||||
|
||||
GetCSV
|
||||
.getCsv(
|
||||
fs,
|
||||
new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))
|
||||
// new BufferedReader(new FileReader(fileURL))
|
||||
, workingDir + "/projects",
|
||||
"eu.dnetlib.dhp.common.collection.models.CSVProject", ';');
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects"))));
|
||||
|
||||
String line;
|
||||
int count = 0;
|
||||
while ((line = in.readLine()) != null) {
|
||||
CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class);
|
||||
if (count == 0) {
|
||||
Assertions.assertTrue(csvp.getId().equals("771736"));
|
||||
Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.1."));
|
||||
Assertions.assertTrue(csvp.getTopics().equals("ERC-2017-COG"));
|
||||
|
||||
}
|
||||
if (count == 22882) {
|
||||
Assertions.assertTrue(csvp.getId().equals("752903"));
|
||||
Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2."));
|
||||
Assertions.assertTrue(csvp.getTopics().equals("MSCA-IF-2016"));
|
||||
}
|
||||
if (count == 223023) {
|
||||
Assertions.assertTrue(csvp.getId().equals("861952"));
|
||||
Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.4.e."));
|
||||
Assertions.assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019"));
|
||||
}
|
||||
Assertions.assertTrue(csvp.getId() != null);
|
||||
Assertions.assertTrue(csvp.getProgramme().startsWith("H2020"));
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Assertions.assertEquals(34957, count);
|
||||
}
|
||||
|
||||
@Disabled
|
||||
@Test
|
||||
void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException {
|
||||
|
||||
String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv";
|
||||
|
||||
GetCSV
|
||||
.getCsv(
|
||||
fs, new BufferedReader(
|
||||
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))),
|
||||
workingDir + "/programme",
|
||||
"eu.dnetlib.dhp.common.collection.models.UnibiGoldModel", ',');
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
|
||||
|
||||
String line;
|
||||
int count = 0;
|
||||
while ((line = in.readLine()) != null) {
|
||||
UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class);
|
||||
if (count == 0) {
|
||||
Assertions.assertTrue(unibi.getIssn().equals("0001-625X"));
|
||||
Assertions.assertTrue(unibi.getIssn_l().equals("0001-625X"));
|
||||
Assertions.assertTrue(unibi.getTitle().equals("Acta Mycologica"));
|
||||
|
||||
}
|
||||
if (count == 43158) {
|
||||
Assertions.assertTrue(unibi.getIssn().equals("2088-6330"));
|
||||
Assertions.assertTrue(unibi.getIssn_l().equals("2088-6330"));
|
||||
Assertions.assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama"));
|
||||
|
||||
}
|
||||
if (count == 67027) {
|
||||
Assertions.assertTrue(unibi.getIssn().equals("2658-7068"));
|
||||
Assertions.assertTrue(unibi.getIssn_l().equals("2308-2488"));
|
||||
Assertions.assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ."));
|
||||
}
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Assertions.assertEquals(67028, count);
|
||||
}
|
||||
|
||||
@Disabled
|
||||
@Test
|
||||
void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException {
|
||||
|
||||
String fileURL = "https://doaj.org/csv";
|
||||
|
||||
try (BufferedReader in = new BufferedReader(
|
||||
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) {
|
||||
try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) {
|
||||
String line;
|
||||
while ((line = in.readLine()) != null) {
|
||||
writer.println(line.replace("\\\"", "\""));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GetCSV
|
||||
.getCsv(
|
||||
fs, new BufferedReader(
|
||||
new FileReader("/tmp/DOAJ_1.csv")),
|
||||
workingDir + "/programme",
|
||||
"eu.dnetlib.dhp.common.collection.models.DOAJModel", ',');
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
|
||||
|
||||
String line;
|
||||
int count = 0;
|
||||
while ((line = in.readLine()) != null) {
|
||||
DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class);
|
||||
if (count == 0) {
|
||||
Assertions.assertEquals("0001-3765", doaj.getIssn());
|
||||
Assertions.assertEquals("1678-2690", doaj.getEissn());
|
||||
Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle());
|
||||
|
||||
}
|
||||
if (count == 7904) {
|
||||
System.out.println(new ObjectMapper().writeValueAsString(doaj));
|
||||
Assertions.assertEquals("", doaj.getIssn());
|
||||
Assertions.assertEquals("2055-7159", doaj.getEissn());
|
||||
Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle());
|
||||
}
|
||||
if (count == 16707) {
|
||||
|
||||
Assertions.assertEquals("", doaj.getIssn());
|
||||
Assertions.assertEquals("2788-6298", doaj.getEissn());
|
||||
Assertions
|
||||
.assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle());
|
||||
}
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Assertions.assertEquals(16713, count);
|
||||
}
|
||||
|
||||
}
|
|
@ -1,80 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.collection.models;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import com.opencsv.bean.CsvBindByName;
|
||||
import com.opencsv.bean.CsvIgnore;
|
||||
|
||||
/**
|
||||
* The model for the programme csv file
|
||||
*/
|
||||
public class CSVProgramme implements Serializable {
|
||||
|
||||
@CsvBindByName(column = "code")
|
||||
private String code;
|
||||
|
||||
@CsvBindByName(column = "title")
|
||||
private String title;
|
||||
|
||||
@CsvBindByName(column = "shortTitle")
|
||||
private String shortTitle;
|
||||
|
||||
@CsvBindByName(column = "language")
|
||||
private String language;
|
||||
|
||||
@CsvIgnore
|
||||
private String classification;
|
||||
|
||||
@CsvIgnore
|
||||
private String classification_short;
|
||||
|
||||
public String getClassification_short() {
|
||||
return classification_short;
|
||||
}
|
||||
|
||||
public void setClassification_short(String classification_short) {
|
||||
this.classification_short = classification_short;
|
||||
}
|
||||
|
||||
public String getClassification() {
|
||||
return classification;
|
||||
}
|
||||
|
||||
public void setClassification(String classification) {
|
||||
this.classification = classification;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public String getShortTitle() {
|
||||
return shortTitle;
|
||||
}
|
||||
|
||||
public void setShortTitle(String shortTitle) {
|
||||
this.shortTitle = shortTitle;
|
||||
}
|
||||
|
||||
public String getLanguage() {
|
||||
return language;
|
||||
}
|
||||
|
||||
public void setLanguage(String language) {
|
||||
this.language = language;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,46 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.collection.models;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import com.opencsv.bean.CsvBindByName;
|
||||
|
||||
/**
|
||||
* the mmodel for the projects csv file
|
||||
*/
|
||||
public class CSVProject implements Serializable {
|
||||
|
||||
@CsvBindByName(column = "id")
|
||||
private String id;
|
||||
|
||||
@CsvBindByName(column = "programme")
|
||||
private String programme;
|
||||
|
||||
@CsvBindByName(column = "topics")
|
||||
private String topics;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getProgramme() {
|
||||
return programme;
|
||||
}
|
||||
|
||||
public void setProgramme(String programme) {
|
||||
this.programme = programme;
|
||||
}
|
||||
|
||||
public String getTopics() {
|
||||
return topics;
|
||||
}
|
||||
|
||||
public void setTopics(String topics) {
|
||||
this.topics = topics;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.collection.models;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import com.opencsv.bean.CsvBindByName;
|
||||
|
||||
public class DOAJModel implements Serializable {
|
||||
@CsvBindByName(column = "Journal title")
|
||||
private String journalTitle;
|
||||
|
||||
@CsvBindByName(column = "Journal ISSN (print version)")
|
||||
private String issn;
|
||||
|
||||
@CsvBindByName(column = "Journal EISSN (online version)")
|
||||
private String eissn;
|
||||
|
||||
@CsvBindByName(column = "Review process")
|
||||
private String reviewProcess;
|
||||
|
||||
public String getJournalTitle() {
|
||||
return journalTitle;
|
||||
}
|
||||
|
||||
public void setJournalTitle(String journalTitle) {
|
||||
this.journalTitle = journalTitle;
|
||||
}
|
||||
|
||||
public String getIssn() {
|
||||
return issn;
|
||||
}
|
||||
|
||||
public void setIssn(String issn) {
|
||||
this.issn = issn;
|
||||
}
|
||||
|
||||
public String getEissn() {
|
||||
return eissn;
|
||||
}
|
||||
|
||||
public void setEissn(String eissn) {
|
||||
this.eissn = eissn;
|
||||
}
|
||||
|
||||
public String getReviewProcess() {
|
||||
return reviewProcess;
|
||||
}
|
||||
|
||||
public void setReviewProcess(String reviewProcess) {
|
||||
this.reviewProcess = reviewProcess;
|
||||
}
|
||||
}
|
|
@ -1,45 +0,0 @@
|
|||
|
||||
package eu.dnetlib.dhp.common.collection.models;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import com.opencsv.bean.CsvBindByName;
|
||||
|
||||
public class UnibiGoldModel implements Serializable {
|
||||
@CsvBindByName(column = "ISSN")
|
||||
private String issn;
|
||||
@CsvBindByName(column = "ISSN_L")
|
||||
private String issn_l;
|
||||
@CsvBindByName(column = "TITLE")
|
||||
private String title;
|
||||
@CsvBindByName(column = "TITLE_SOURCE")
|
||||
private String title_source;
|
||||
|
||||
public String getIssn() {
|
||||
return issn;
|
||||
}
|
||||
|
||||
public void setIssn(String issn) {
|
||||
this.issn = issn;
|
||||
}
|
||||
|
||||
public String getIssn_l() {
|
||||
return issn_l;
|
||||
}
|
||||
|
||||
public String getTitle() {
|
||||
return title;
|
||||
}
|
||||
|
||||
public void setTitle(String title) {
|
||||
this.title = title;
|
||||
}
|
||||
|
||||
public String getTitle_source() {
|
||||
return title_source;
|
||||
}
|
||||
|
||||
public void setTitle_source(String title_source) {
|
||||
this.title_source = title_source;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,145 @@
|
|||
package eu.dnetlib.dhp.actionmanager.project;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
|
||||
import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
import eu.dnetlib.dhp.common.collection.GetCSV;
|
||||
import eu.dnetlib.dhp.common.collection.HttpConnector2;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocalFileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.junit.jupiter.api.*;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.file.Files;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class DownloadCsvTest {
|
||||
|
||||
private static String workingDir;
|
||||
|
||||
private static LocalFileSystem fs;
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files
|
||||
.createTempDirectory(DownloadCsvTest.class.getSimpleName())
|
||||
.toString();
|
||||
|
||||
fs = FileSystem.getLocal(new Configuration());
|
||||
}
|
||||
|
||||
@Disabled
|
||||
@Test
|
||||
void getProgrammeFileTest() throws Exception {
|
||||
|
||||
String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv";
|
||||
|
||||
GetCSV
|
||||
.getCsv(
|
||||
fs, new BufferedReader(
|
||||
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))),
|
||||
workingDir + "/programme",
|
||||
CSVProgramme.class.getName(), ';');
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
|
||||
|
||||
String line;
|
||||
int count = 0;
|
||||
while ((line = in.readLine()) != null) {
|
||||
CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class);
|
||||
if (count == 0) {
|
||||
assertTrue(csvp.getCode().equals("H2020-EU.5.f."));
|
||||
assertTrue(
|
||||
csvp
|
||||
.getTitle()
|
||||
.startsWith(
|
||||
"Develop the governance for the advancement of responsible research and innovation by all stakeholders"));
|
||||
assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation"));
|
||||
assertTrue(csvp.getShortTitle().equals(""));
|
||||
assertTrue(csvp.getLanguage().equals("en"));
|
||||
}
|
||||
if (count == 28) {
|
||||
assertTrue(csvp.getCode().equals("H2020-EU.3.5.4."));
|
||||
assertTrue(
|
||||
csvp
|
||||
.getTitle()
|
||||
.equals(
|
||||
"Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation"));
|
||||
assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation"));
|
||||
assertTrue(csvp.getLanguage().equals("de"));
|
||||
}
|
||||
if (count == 229) {
|
||||
assertTrue(csvp.getCode().equals("H2020-EU.3.2."));
|
||||
assertTrue(
|
||||
csvp
|
||||
.getTitle()
|
||||
.equals(
|
||||
"SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy"));
|
||||
assertTrue(
|
||||
csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy"));
|
||||
assertTrue(csvp.getLanguage().equals("en"));
|
||||
}
|
||||
assertTrue(csvp.getCode() != null);
|
||||
assertTrue(csvp.getCode().startsWith("H2020"));
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Assertions.assertEquals(767, count);
|
||||
}
|
||||
|
||||
@Disabled
|
||||
@Test
|
||||
void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException {
|
||||
String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv";
|
||||
|
||||
GetCSV
|
||||
.getCsv(
|
||||
fs,
|
||||
new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))
|
||||
, workingDir + "/projects",
|
||||
CSVProject.class.getName(), ';');
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects"))));
|
||||
|
||||
String line;
|
||||
int count = 0;
|
||||
while ((line = in.readLine()) != null) {
|
||||
CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class);
|
||||
if (count == 0) {
|
||||
assertTrue(csvp.getId().equals("771736"));
|
||||
assertTrue(csvp.getProgramme().equals("H2020-EU.1.1."));
|
||||
assertTrue(csvp.getTopics().equals("ERC-2017-COG"));
|
||||
|
||||
}
|
||||
if (count == 22882) {
|
||||
assertTrue(csvp.getId().equals("752903"));
|
||||
assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2."));
|
||||
assertTrue(csvp.getTopics().equals("MSCA-IF-2016"));
|
||||
}
|
||||
if (count == 223023) {
|
||||
assertTrue(csvp.getId().equals("861952"));
|
||||
assertTrue(csvp.getProgramme().equals("H2020-EU.4.e."));
|
||||
assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019"));
|
||||
}
|
||||
assertTrue(csvp.getId() != null);
|
||||
assertTrue(csvp.getProgramme().startsWith("H2020"));
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Assertions.assertEquals(34957, count);
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void cleanup() {
|
||||
FileUtils.deleteQuietly(new File(workingDir));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,139 @@
|
|||
package eu.dnetlib.dhp.oa.graph.hostedbymap;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.common.collection.CollectorException;
|
||||
import eu.dnetlib.dhp.common.collection.GetCSV;
|
||||
import eu.dnetlib.dhp.common.collection.HttpConnector2;
|
||||
import eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel;
|
||||
import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocalFileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.junit.jupiter.api.*;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.file.Files;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class DownloadCsvTest {
|
||||
|
||||
private static String workingDir;
|
||||
|
||||
private static LocalFileSystem fs;
|
||||
|
||||
@BeforeAll
|
||||
public static void beforeAll() throws IOException {
|
||||
workingDir = Files
|
||||
.createTempDirectory(DownloadCsvTest.class.getSimpleName())
|
||||
.toString();
|
||||
|
||||
fs = FileSystem.getLocal(new Configuration());
|
||||
}
|
||||
|
||||
@Disabled
|
||||
@Test
|
||||
void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException {
|
||||
|
||||
String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv";
|
||||
|
||||
GetCSV
|
||||
.getCsv(
|
||||
fs, new BufferedReader(
|
||||
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))),
|
||||
workingDir + "/programme",
|
||||
UnibiGoldModel.class.getName());
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
|
||||
|
||||
String line;
|
||||
int count = 0;
|
||||
while ((line = in.readLine()) != null) {
|
||||
UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class);
|
||||
if (count == 0) {
|
||||
assertTrue(unibi.getIssn().equals("0001-625X"));
|
||||
assertTrue(unibi.getIssnL().equals("0001-625X"));
|
||||
assertTrue(unibi.getTitle().equals("Acta Mycologica"));
|
||||
|
||||
}
|
||||
if (count == 43158) {
|
||||
assertTrue(unibi.getIssn().equals("2088-6330"));
|
||||
assertTrue(unibi.getIssnL().equals("2088-6330"));
|
||||
assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama"));
|
||||
|
||||
}
|
||||
if (count == 67027) {
|
||||
assertTrue(unibi.getIssn().equals("2658-7068"));
|
||||
assertTrue(unibi.getIssnL().equals("2308-2488"));
|
||||
assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ."));
|
||||
}
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Assertions.assertEquals(67028, count);
|
||||
}
|
||||
|
||||
@Disabled
|
||||
@Test
|
||||
void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException {
|
||||
|
||||
String fileURL = "https://doaj.org/csv";
|
||||
|
||||
try (BufferedReader in = new BufferedReader(
|
||||
new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) {
|
||||
try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) {
|
||||
String line;
|
||||
while ((line = in.readLine()) != null) {
|
||||
writer.println(line.replace("\\\"", "\""));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GetCSV
|
||||
.getCsv(
|
||||
fs, new BufferedReader(
|
||||
new FileReader("/tmp/DOAJ_1.csv")),
|
||||
workingDir + "/programme",
|
||||
DOAJModel.class.getName());
|
||||
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
|
||||
|
||||
String line;
|
||||
int count = 0;
|
||||
while ((line = in.readLine()) != null) {
|
||||
DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class);
|
||||
if (count == 0) {
|
||||
Assertions.assertEquals("0001-3765", doaj.getIssn());
|
||||
Assertions.assertEquals("1678-2690", doaj.getEissn());
|
||||
Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle());
|
||||
|
||||
}
|
||||
if (count == 7904) {
|
||||
System.out.println(new ObjectMapper().writeValueAsString(doaj));
|
||||
Assertions.assertEquals("", doaj.getIssn());
|
||||
Assertions.assertEquals("2055-7159", doaj.getEissn());
|
||||
Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle());
|
||||
}
|
||||
if (count == 16707) {
|
||||
|
||||
Assertions.assertEquals("", doaj.getIssn());
|
||||
Assertions.assertEquals("2788-6298", doaj.getEissn());
|
||||
Assertions
|
||||
.assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle());
|
||||
}
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Assertions.assertEquals(16713, count);
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void cleanup() {
|
||||
FileUtils.deleteQuietly(new File(workingDir));
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue