diff --git a/dhp-common/pom.xml b/dhp-common/pom.xml
index 4c7810c47c..c057123b1d 100644
--- a/dhp-common/pom.xml
+++ b/dhp-common/pom.xml
@@ -117,6 +117,11 @@
eu.dnetlib.dhp
dhp-schemas
+
+
+ com.opencsv
+ opencsv
+
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java
similarity index 96%
rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java
rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java
index 8e46ab92b8..c5926848e7 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java
@@ -1,5 +1,5 @@
-package eu.dnetlib.dhp.aggregation.common;
+package eu.dnetlib.dhp.common.aggregation;
import java.io.Closeable;
import java.io.IOException;
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java
similarity index 93%
rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java
rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java
index 144d297e62..5d94c2f89a 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java
@@ -1,5 +1,5 @@
-package eu.dnetlib.dhp.collection;
+package eu.dnetlib.dhp.common.collection;
public class CollectorException extends Exception {
diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java
new file mode 100644
index 0000000000..44f4121ebe
--- /dev/null
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java
@@ -0,0 +1,65 @@
+
+package eu.dnetlib.dhp.common.collection;
+
+import java.io.*;
+import java.net.URL;
+import java.net.URLConnection;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import java.util.Optional;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.opencsv.bean.CsvToBeanBuilder;
+
+import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+
+public class GetCSV {
+
+ public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath,
+ String modelClass) throws IOException, ClassNotFoundException {
+ getCsv(fileSystem, reader, hdfsPath, modelClass, ',');
+
+ }
+
+ public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath,
+ String modelClass, char delimiter) throws IOException, ClassNotFoundException {
+
+ Path hdfsWritePath = new Path(hdfsPath);
+ FSDataOutputStream fsDataOutputStream = null;
+ if (fileSystem.exists(hdfsWritePath)) {
+ fileSystem.delete(hdfsWritePath, false);
+ }
+ fsDataOutputStream = fileSystem.create(hdfsWritePath);
+
+ try(BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))){
+
+ ObjectMapper mapper = new ObjectMapper();
+
+ new CsvToBeanBuilder(reader)
+ .withType(Class.forName(modelClass))
+ .withSeparator(delimiter)
+ .build()
+ .parse()
+ .forEach(line -> {
+ try {
+ writer.write(mapper.writeValueAsString(line));
+ writer.newLine();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+
+ }
+
+}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java
similarity index 97%
rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java
rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java
index ab0d5cc02a..6fcec00dd1 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java
@@ -1,5 +1,5 @@
-package eu.dnetlib.dhp.collection;
+package eu.dnetlib.dhp.common.collection;
/**
* Bundles the http connection parameters driving the client behaviour.
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java
similarity index 98%
rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java
rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java
index 8493a3436a..724f5f0e1d 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java
+++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java
@@ -1,5 +1,5 @@
-package eu.dnetlib.dhp.collection;
+package eu.dnetlib.dhp.common.collection;
import static eu.dnetlib.dhp.utils.DHPUtils.*;
@@ -15,7 +15,7 @@ import org.apache.http.HttpHeaders;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
/**
* Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java
new file mode 100644
index 0000000000..d24083e0d7
--- /dev/null
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/GetCSVTest.java
@@ -0,0 +1,246 @@
+
+package eu.dnetlib.dhp.common.collection;
+
+import java.io.*;
+import java.nio.file.Files;
+
+import jdk.nashorn.internal.ir.annotations.Ignore;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import eu.dnetlib.dhp.common.collection.models.CSVProgramme;
+import eu.dnetlib.dhp.common.collection.models.CSVProject;
+import eu.dnetlib.dhp.common.collection.models.DOAJModel;
+import eu.dnetlib.dhp.common.collection.models.UnibiGoldModel;
+
+public class GetCSVTest {
+
+ private static String workingDir;
+
+ private static LocalFileSystem fs;
+
+ @Disabled
+ @Test
+ void getProgrammeFileTest() throws Exception {
+
+ String fileURL = "https://cordis.europa.eu/data/reference/cordisref-h2020programmes.csv";
+
+ GetCSV
+ .getCsv(
+ fs, new BufferedReader(
+ new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))),
+ workingDir + "/programme",
+ "eu.dnetlib.dhp.common.collection.models.CSVProgramme", ';');
+
+ BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
+
+ String line;
+ int count = 0;
+ while ((line = in.readLine()) != null) {
+ CSVProgramme csvp = new ObjectMapper().readValue(line, CSVProgramme.class);
+ if (count == 0) {
+ Assertions.assertTrue(csvp.getCode().equals("H2020-EU.5.f."));
+ Assertions
+ .assertTrue(
+ csvp
+ .getTitle()
+ .startsWith(
+ "Develop the governance for the advancement of responsible research and innovation by all stakeholders"));
+ Assertions
+ .assertTrue(csvp.getTitle().endsWith("promote an ethics framework for research and innovation"));
+ Assertions.assertTrue(csvp.getShortTitle().equals(""));
+ Assertions.assertTrue(csvp.getLanguage().equals("en"));
+ }
+ if (count == 28) {
+ Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.5.4."));
+ Assertions
+ .assertTrue(
+ csvp
+ .getTitle()
+ .equals(
+ "Grundlagen für den Übergang zu einer umweltfreundlichen Wirtschaft und Gesellschaft durch Öko-Innovation"));
+ Assertions
+ .assertTrue(csvp.getShortTitle().equals("A green economy and society through eco-innovation"));
+ Assertions.assertTrue(csvp.getLanguage().equals("de"));
+ }
+ if (count == 229) {
+ Assertions.assertTrue(csvp.getCode().equals("H2020-EU.3.2."));
+ Assertions
+ .assertTrue(
+ csvp
+ .getTitle()
+ .equals(
+ "SOCIETAL CHALLENGES - Food security, sustainable agriculture and forestry, marine, maritime and inland water research, and the bioeconomy"));
+ Assertions
+ .assertTrue(
+ csvp.getShortTitle().equals("Food, agriculture, forestry, marine research and bioeconomy"));
+ Assertions.assertTrue(csvp.getLanguage().equals("en"));
+ }
+ Assertions.assertTrue(csvp.getCode() != null);
+ Assertions.assertTrue(csvp.getCode().startsWith("H2020"));
+ count += 1;
+ }
+
+ Assertions.assertEquals(767, count);
+ }
+
+ @BeforeAll
+ public static void beforeAll() throws IOException {
+ workingDir = Files
+ .createTempDirectory(GetCSVTest.class.getSimpleName())
+ .toString();
+
+ fs = FileSystem.getLocal(new Configuration());
+ }
+
+ @Disabled
+ @Test
+ void getProjectFileTest() throws IOException, CollectorException, ClassNotFoundException {
+ String fileURL = "https://cordis.europa.eu/data/cordis-h2020projects.csv";
+ // String fileURL = "/Users/miriam.baglioni/Downloads/cordis-h2020projects.csv";
+
+ GetCSV
+ .getCsv(
+ fs,
+ new BufferedReader(new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))
+ // new BufferedReader(new FileReader(fileURL))
+ , workingDir + "/projects",
+ "eu.dnetlib.dhp.common.collection.models.CSVProject", ';');
+
+ BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/projects"))));
+
+ String line;
+ int count = 0;
+ while ((line = in.readLine()) != null) {
+ CSVProject csvp = new ObjectMapper().readValue(line, CSVProject.class);
+ if (count == 0) {
+ Assertions.assertTrue(csvp.getId().equals("771736"));
+ Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.1."));
+ Assertions.assertTrue(csvp.getTopics().equals("ERC-2017-COG"));
+
+ }
+ if (count == 22882) {
+ Assertions.assertTrue(csvp.getId().equals("752903"));
+ Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.1.3.2."));
+ Assertions.assertTrue(csvp.getTopics().equals("MSCA-IF-2016"));
+ }
+ if (count == 223023) {
+ Assertions.assertTrue(csvp.getId().equals("861952"));
+ Assertions.assertTrue(csvp.getProgramme().equals("H2020-EU.4.e."));
+ Assertions.assertTrue(csvp.getTopics().equals("SGA-SEWP-COST-2019"));
+ }
+ Assertions.assertTrue(csvp.getId() != null);
+ Assertions.assertTrue(csvp.getProgramme().startsWith("H2020"));
+ count += 1;
+ }
+
+ Assertions.assertEquals(34957, count);
+ }
+
+ @Disabled
+ @Test
+ void getUnibiFileTest() throws CollectorException, IOException, ClassNotFoundException {
+
+ String fileURL = "https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv";
+
+ GetCSV
+ .getCsv(
+ fs, new BufferedReader(
+ new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL))),
+ workingDir + "/programme",
+ "eu.dnetlib.dhp.common.collection.models.UnibiGoldModel", ',');
+
+ BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
+
+ String line;
+ int count = 0;
+ while ((line = in.readLine()) != null) {
+ UnibiGoldModel unibi = new ObjectMapper().readValue(line, UnibiGoldModel.class);
+ if (count == 0) {
+ Assertions.assertTrue(unibi.getIssn().equals("0001-625X"));
+ Assertions.assertTrue(unibi.getIssn_l().equals("0001-625X"));
+ Assertions.assertTrue(unibi.getTitle().equals("Acta Mycologica"));
+
+ }
+ if (count == 43158) {
+ Assertions.assertTrue(unibi.getIssn().equals("2088-6330"));
+ Assertions.assertTrue(unibi.getIssn_l().equals("2088-6330"));
+ Assertions.assertTrue(unibi.getTitle().equals("Religió: Jurnal Studi Agama-agama"));
+
+ }
+ if (count == 67027) {
+ Assertions.assertTrue(unibi.getIssn().equals("2658-7068"));
+ Assertions.assertTrue(unibi.getIssn_l().equals("2308-2488"));
+ Assertions.assertTrue(unibi.getTitle().equals("Istoriko-èkonomičeskie issledovaniâ."));
+ }
+
+ count += 1;
+ }
+
+ Assertions.assertEquals(67028, count);
+ }
+
+ @Disabled
+ @Test
+ void getDoajFileTest() throws CollectorException, IOException, ClassNotFoundException {
+
+ String fileURL = "https://doaj.org/csv";
+
+ try (BufferedReader in = new BufferedReader(
+ new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)))) {
+ try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")))) {
+ String line;
+ while ((line = in.readLine()) != null) {
+ writer.println(line.replace("\\\"", "\""));
+ }
+ }
+ }
+
+ GetCSV
+ .getCsv(
+ fs, new BufferedReader(
+ new FileReader("/tmp/DOAJ_1.csv")),
+ workingDir + "/programme",
+ "eu.dnetlib.dhp.common.collection.models.DOAJModel", ',');
+
+ BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(new Path(workingDir + "/programme"))));
+
+ String line;
+ int count = 0;
+ while ((line = in.readLine()) != null) {
+ DOAJModel doaj = new ObjectMapper().readValue(line, DOAJModel.class);
+ if (count == 0) {
+ Assertions.assertEquals("0001-3765", doaj.getIssn());
+ Assertions.assertEquals("1678-2690", doaj.getEissn());
+ Assertions.assertEquals("Anais da Academia Brasileira de Ciências", doaj.getJournalTitle());
+
+ }
+ if (count == 7904) {
+ System.out.println(new ObjectMapper().writeValueAsString(doaj));
+ Assertions.assertEquals("",doaj.getIssn());
+ Assertions.assertEquals("2055-7159", doaj.getEissn());
+ Assertions.assertEquals("BJR|case reports", doaj.getJournalTitle());
+ }
+ if (count == 16707) {
+
+ Assertions.assertEquals("",doaj.getIssn());
+ Assertions.assertEquals("2788-6298",doaj.getEissn());
+ Assertions
+ .assertEquals("Teacher Education through Flexible Learning in Africa", doaj.getJournalTitle());
+ }
+
+ count += 1;
+ }
+
+ Assertions.assertEquals(16713, count);
+ }
+
+}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java
similarity index 79%
rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java
rename to dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java
index d486f01049..d17fcc75ef 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProgramme.java
@@ -1,20 +1,32 @@
-package eu.dnetlib.dhp.actionmanager.project.utils;
+package eu.dnetlib.dhp.common.collection.models;
import java.io.Serializable;
+import com.opencsv.bean.CsvBindByName;
+import com.opencsv.bean.CsvIgnore;
+
/**
* The model for the programme csv file
*/
public class CSVProgramme implements Serializable {
- private String rcn;
+ @CsvBindByName(column = "code")
private String code;
+ @CsvBindByName(column = "title")
private String title;
+
+ @CsvBindByName(column = "shortTitle")
private String shortTitle;
+
+ @CsvBindByName(column = "language")
private String language;
+
+ @CsvIgnore
private String classification;
+
+ @CsvIgnore
private String classification_short;
public String getClassification_short() {
@@ -33,14 +45,6 @@ public class CSVProgramme implements Serializable {
this.classification = classification;
}
- public String getRcn() {
- return rcn;
- }
-
- public void setRcn(String rcn) {
- this.rcn = rcn;
- }
-
public String getCode() {
return code;
}
@@ -73,5 +77,4 @@ public class CSVProgramme implements Serializable {
this.language = language;
}
-//
}
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java
new file mode 100644
index 0000000000..0d939ca0ec
--- /dev/null
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/CSVProject.java
@@ -0,0 +1,51 @@
+
+package eu.dnetlib.dhp.common.collection.models;
+
+import java.io.Serializable;
+
+import com.opencsv.bean.CsvBindByName;
+
+/**
+ * the mmodel for the projects csv file
+ */
+public class CSVProject implements Serializable {
+
+ @CsvBindByName(column = "id")
+ private String id;
+
+ @CsvBindByName(column = "programme")
+ private String programme;
+
+ @CsvBindByName(column = "topics")
+ private String topics;
+
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+
+
+ public String getProgramme() {
+ return programme;
+ }
+
+ public void setProgramme(String programme) {
+ this.programme = programme;
+ }
+
+ public String getTopics() {
+ return topics;
+ }
+
+ public void setTopics(String topics) {
+ this.topics = topics;
+ }
+
+
+
+}
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java
new file mode 100644
index 0000000000..156ba36320
--- /dev/null
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/DOAJModel.java
@@ -0,0 +1,52 @@
+
+package eu.dnetlib.dhp.common.collection.models;
+
+import java.io.Serializable;
+
+import com.opencsv.bean.CsvBindByName;
+
+public class DOAJModel implements Serializable {
+ @CsvBindByName(column = "Journal title")
+ private String journalTitle;
+
+ @CsvBindByName(column = "Journal ISSN (print version)")
+ private String issn;
+
+ @CsvBindByName(column = "Journal EISSN (online version)")
+ private String eissn;
+
+ @CsvBindByName(column = "Review process")
+ private String reviewProcess;
+
+ public String getJournalTitle() {
+ return journalTitle;
+ }
+
+ public void setJournalTitle(String journalTitle) {
+ this.journalTitle = journalTitle;
+ }
+
+ public String getIssn() {
+ return issn;
+ }
+
+ public void setIssn(String issn) {
+ this.issn = issn;
+ }
+
+ public String getEissn() {
+ return eissn;
+ }
+
+ public void setEissn(String eissn) {
+ this.eissn = eissn;
+ }
+
+ public String getReviewProcess() {
+ return reviewProcess;
+ }
+
+ public void setReviewProcess(String reviewProcess) {
+ this.reviewProcess = reviewProcess;
+ }
+}
diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java
new file mode 100644
index 0000000000..ae47262bf2
--- /dev/null
+++ b/dhp-common/src/test/java/eu/dnetlib/dhp/common/collection/models/UnibiGoldModel.java
@@ -0,0 +1,45 @@
+
+package eu.dnetlib.dhp.common.collection.models;
+
+import java.io.Serializable;
+
+import com.opencsv.bean.CsvBindByName;
+
+public class UnibiGoldModel implements Serializable {
+ @CsvBindByName(column = "ISSN")
+ private String issn;
+ @CsvBindByName(column = "ISSN_L")
+ private String issn_l;
+ @CsvBindByName(column = "TITLE")
+ private String title;
+ @CsvBindByName(column = "TITLE_SOURCE")
+ private String title_source;
+
+ public String getIssn() {
+ return issn;
+ }
+
+ public void setIssn(String issn) {
+ this.issn = issn;
+ }
+
+ public String getIssn_l() {
+ return issn_l;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ this.title = title;
+ }
+
+ public String getTitle_source() {
+ return title_source;
+ }
+
+ public void setTitle_source(String title_source) {
+ this.title_source = title_source;
+ }
+}
diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml
index 2942870087..98e22d8a35 100644
--- a/dhp-workflows/dhp-aggregation/pom.xml
+++ b/dhp-workflows/dhp-aggregation/pom.xml
@@ -84,14 +84,6 @@
json
-
-
- org.apache.commons
- commons-csv
- 1.8
-
-
-
org.apache.poi
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java
index 40cf5ee532..686b0fc7f6 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java
@@ -20,7 +20,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
-import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme;
+import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import scala.Tuple2;
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java
index b1a381415c..8efc76a0ec 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java
@@ -18,7 +18,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
-import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject;
+import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import scala.Tuple2;
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java
index a4a0bf6a4a..cc1411b318 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java
@@ -4,7 +4,6 @@ package eu.dnetlib.dhp.actionmanager.project;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.Arrays;
-import java.util.HashMap;
import java.util.Objects;
import java.util.Optional;
@@ -22,9 +21,9 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
-import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme;
-import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject;
-import eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic;
+import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme;
+import eu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject;
+import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.schema.action.AtomicAction;
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java
deleted file mode 100644
index c53cd2127d..0000000000
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVParser.java
+++ /dev/null
@@ -1,47 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.project.utils;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.csv.CSVFormat;
-import org.apache.commons.csv.CSVRecord;
-import org.apache.commons.lang.reflect.FieldUtils;
-
-/**
- * Reads a generic csv and maps it into classes that mirror its schema
- */
-public class CSVParser {
-
- public List parse(String csvFile, String classForName)
- throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException {
- return parse(csvFile, classForName, ';');
- }
-
- public List parse(String csvFile, String classForName, char delimiter)
- throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException {
- final CSVFormat format = CSVFormat.EXCEL
- .withHeader()
- .withDelimiter(delimiter)
- .withQuote('"')
- .withTrim();
- List ret = new ArrayList<>();
- final org.apache.commons.csv.CSVParser parser = org.apache.commons.csv.CSVParser.parse(csvFile, format);
- final Set headers = parser.getHeaderMap().keySet();
- Class> clazz = Class.forName(classForName);
- for (CSVRecord csvRecord : parser.getRecords()) {
-
- @SuppressWarnings("unchecked")
- final R cc = (R) clazz.newInstance();
- for (String header : headers) {
- FieldUtils.writeField(cc, header, csvRecord.get(header), true);
-
- }
- ret.add(cc);
- }
-
- return ret;
- }
-}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java
deleted file mode 100644
index 268d5f28cc..0000000000
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProject.java
+++ /dev/null
@@ -1,200 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.project.utils;
-
-import java.io.Serializable;
-
-/**
- * the mmodel for the projects csv file
- */
-public class CSVProject implements Serializable {
- private String rcn;
- private String id;
- private String acronym;
- private String status;
- private String programme;
- private String topics;
- private String frameworkProgramme;
- private String title;
- private String startDate;
- private String endDate;
- private String projectUrl;
- private String objective;
- private String totalCost;
- private String ecMaxContribution;
- private String call;
- private String fundingScheme;
- private String coordinator;
- private String coordinatorCountry;
- private String participants;
- private String participantCountries;
- private String subjects;
-
- public String getRcn() {
- return rcn;
- }
-
- public void setRcn(String rcn) {
- this.rcn = rcn;
- }
-
- public String getId() {
- return id;
- }
-
- public void setId(String id) {
- this.id = id;
- }
-
- public String getAcronym() {
- return acronym;
- }
-
- public void setAcronym(String acronym) {
- this.acronym = acronym;
- }
-
- public String getStatus() {
- return status;
- }
-
- public void setStatus(String status) {
- this.status = status;
- }
-
- public String getProgramme() {
- return programme;
- }
-
- public void setProgramme(String programme) {
- this.programme = programme;
- }
-
- public String getTopics() {
- return topics;
- }
-
- public void setTopics(String topics) {
- this.topics = topics;
- }
-
- public String getFrameworkProgramme() {
- return frameworkProgramme;
- }
-
- public void setFrameworkProgramme(String frameworkProgramme) {
- this.frameworkProgramme = frameworkProgramme;
- }
-
- public String getTitle() {
- return title;
- }
-
- public void setTitle(String title) {
- this.title = title;
- }
-
- public String getStartDate() {
- return startDate;
- }
-
- public void setStartDate(String startDate) {
- this.startDate = startDate;
- }
-
- public String getEndDate() {
- return endDate;
- }
-
- public void setEndDate(String endDate) {
- this.endDate = endDate;
- }
-
- public String getProjectUrl() {
- return projectUrl;
- }
-
- public void setProjectUrl(String projectUrl) {
- this.projectUrl = projectUrl;
- }
-
- public String getObjective() {
- return objective;
- }
-
- public void setObjective(String objective) {
- this.objective = objective;
- }
-
- public String getTotalCost() {
- return totalCost;
- }
-
- public void setTotalCost(String totalCost) {
- this.totalCost = totalCost;
- }
-
- public String getEcMaxContribution() {
- return ecMaxContribution;
- }
-
- public void setEcMaxContribution(String ecMaxContribution) {
- this.ecMaxContribution = ecMaxContribution;
- }
-
- public String getCall() {
- return call;
- }
-
- public void setCall(String call) {
- this.call = call;
- }
-
- public String getFundingScheme() {
- return fundingScheme;
- }
-
- public void setFundingScheme(String fundingScheme) {
- this.fundingScheme = fundingScheme;
- }
-
- public String getCoordinator() {
- return coordinator;
- }
-
- public void setCoordinator(String coordinator) {
- this.coordinator = coordinator;
- }
-
- public String getCoordinatorCountry() {
- return coordinatorCountry;
- }
-
- public void setCoordinatorCountry(String coordinatorCountry) {
- this.coordinatorCountry = coordinatorCountry;
- }
-
- public String getParticipants() {
- return participants;
- }
-
- public void setParticipants(String participants) {
- this.participants = participants;
- }
-
- public String getParticipantCountries() {
- return participantCountries;
- }
-
- public void setParticipantCountries(String participantCountries) {
- this.participantCountries = participantCountries;
- }
-
- public String getSubjects() {
- return subjects;
- }
-
- public void setSubjects(String subjects) {
- this.subjects = subjects;
- }
-
-}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java
index 5ce730692d..a520176f43 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java
@@ -17,6 +17,8 @@ import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import eu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic;
+
/**
* Reads a generic excel file and maps it into classes that mirror its schema
*/
@@ -30,7 +32,7 @@ public class EXCELParser {
XSSFSheet sheet = wb.getSheet(sheetName);
- if (sheetName == null) {
+ if (sheet == null) {
throw new IllegalArgumentException("Sheet name " + sheetName + " not present in current file");
}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java
index 1ae775bec1..c967d4caee 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadCSV.java
@@ -1,36 +1,21 @@
package eu.dnetlib.dhp.actionmanager.project.utils;
-import java.io.BufferedWriter;
-import java.io.Closeable;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.nio.charset.StandardCharsets;
+import java.io.*;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.collection.HttpConnector2;
+import eu.dnetlib.dhp.common.collection.GetCSV;
+import eu.dnetlib.dhp.common.collection.HttpConnector2;
/**
* Applies the parsing of a csv file and writes the Serialization of it in hdfs
*/
-public class ReadCSV implements Closeable {
- private static final Log log = LogFactory.getLog(ReadCSV.class);
-
- private final BufferedWriter writer;
- private final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
- private final String csvFile;
- private final char delimiter;
+public class ReadCSV {
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@@ -50,57 +35,18 @@ public class ReadCSV implements Closeable {
char del = ';';
if (delimiter.isPresent())
del = delimiter.get().charAt(0);
- try (final ReadCSV readCSV = new ReadCSV(hdfsPath, hdfsNameNode, fileURL, del)) {
- log.info("Getting CSV file...");
- readCSV.execute(classForName);
- }
-
- }
-
- public void execute(final String classForName)
- throws IOException, ClassNotFoundException, IllegalAccessException, InstantiationException {
- CSVParser csvParser = new CSVParser();
- csvParser
- .parse(csvFile, classForName, delimiter)
- .stream()
- .forEach(this::write);
- }
-
- @Override
- public void close() throws IOException {
- writer.close();
- }
-
- public ReadCSV(
- final String hdfsPath,
- final String hdfsNameNode,
- final String fileURL,
- char delimiter)
- throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
- HttpConnector2 httpConnector = new HttpConnector2();
+
FileSystem fileSystem = FileSystem.get(conf);
- Path hdfsWritePath = new Path(hdfsPath);
+ BufferedReader reader = new BufferedReader(
+ new InputStreamReader(new HttpConnector2().getInputSourceAsStream(fileURL)));
- if (fileSystem.exists(hdfsWritePath)) {
- fileSystem.delete(hdfsWritePath, false);
- }
- final FSDataOutputStream fos = fileSystem.create(hdfsWritePath);
+ GetCSV.getCsv(fileSystem, reader, hdfsPath, classForName, del);
- this.writer = new BufferedWriter(new OutputStreamWriter(fos, StandardCharsets.UTF_8));
- this.csvFile = httpConnector.getInputSource(fileURL);
- this.delimiter = delimiter;
- }
+ reader.close();
- protected void write(final Object p) {
- try {
- writer.write(OBJECT_MAPPER.writeValueAsString(p));
- writer.newLine();
- } catch (final Exception e) {
- throw new RuntimeException(e);
- }
}
}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java
index 359e46fc73..9e73cbc370 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java
@@ -16,8 +16,8 @@ import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
-import eu.dnetlib.dhp.collection.CollectorException;
-import eu.dnetlib.dhp.collection.HttpConnector2;
+import eu.dnetlib.dhp.common.collection.CollectorException;
+import eu.dnetlib.dhp.common.collection.HttpConnector2;
/**
* Applies the parsing of an excel file and writes the Serialization of it in hdfs
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java
new file mode 100644
index 0000000000..df06fd6b4c
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProgramme.java
@@ -0,0 +1,80 @@
+
+package eu.dnetlib.dhp.actionmanager.project.utils.model;
+
+import java.io.Serializable;
+
+import com.opencsv.bean.CsvBindByName;
+import com.opencsv.bean.CsvIgnore;
+
+/**
+ * The model for the programme csv file
+ */
+public class CSVProgramme implements Serializable {
+
+ @CsvBindByName(column = "code")
+ private String code;
+
+ @CsvBindByName(column = "title")
+ private String title;
+
+ @CsvBindByName(column = "shortTitle")
+ private String shortTitle;
+
+ @CsvBindByName(column = "language")
+ private String language;
+
+ @CsvIgnore
+ private String classification;
+
+ @CsvIgnore
+ private String classification_short;
+
+ public String getClassification_short() {
+ return classification_short;
+ }
+
+ public void setClassification_short(String classification_short) {
+ this.classification_short = classification_short;
+ }
+
+ public String getClassification() {
+ return classification;
+ }
+
+ public void setClassification(String classification) {
+ this.classification = classification;
+ }
+
+ public String getCode() {
+ return code;
+ }
+
+ public void setCode(String code) {
+ this.code = code;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ this.title = title;
+ }
+
+ public String getShortTitle() {
+ return shortTitle;
+ }
+
+ public void setShortTitle(String shortTitle) {
+ this.shortTitle = shortTitle;
+ }
+
+ public String getLanguage() {
+ return language;
+ }
+
+ public void setLanguage(String language) {
+ this.language = language;
+ }
+
+}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java
new file mode 100644
index 0000000000..73cea0539f
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/CSVProject.java
@@ -0,0 +1,51 @@
+
+package eu.dnetlib.dhp.actionmanager.project.utils.model;
+
+import java.io.Serializable;
+
+import com.opencsv.bean.CsvBindByName;
+
+/**
+ * the mmodel for the projects csv file
+ */
+public class CSVProject implements Serializable {
+
+ @CsvBindByName(column = "id")
+ private String id;
+
+ @CsvBindByName(column = "programme")
+ private String programme;
+
+ @CsvBindByName(column = "topics")
+ private String topics;
+
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+
+
+ public String getProgramme() {
+ return programme;
+ }
+
+ public void setProgramme(String programme) {
+ this.programme = programme;
+ }
+
+ public String getTopics() {
+ return topics;
+ }
+
+ public void setTopics(String topics) {
+ this.topics = topics;
+ }
+
+
+
+}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELTopic.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/EXCELTopic.java
similarity index 97%
rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELTopic.java
rename to dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/EXCELTopic.java
index 5607df1184..fa2e3422e8 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELTopic.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/model/EXCELTopic.java
@@ -1,5 +1,5 @@
-package eu.dnetlib.dhp.actionmanager.project.utils;
+package eu.dnetlib.dhp.actionmanager.project.utils.model;
import java.io.Serializable;
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java
index 5491696736..dbf053a72c 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/ReportingJob.java
@@ -6,6 +6,8 @@ import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+
public abstract class ReportingJob {
/**
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java
index d0872da1da..2ea3f35ccb 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java
@@ -16,7 +16,6 @@ import org.apache.hadoop.io.compress.DeflateCodec;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.aggregation.common.ReporterCallback;
import eu.dnetlib.dhp.aggregation.common.ReportingJob;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
@@ -24,6 +23,9 @@ import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin;
import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import eu.dnetlib.dhp.common.collection.CollectorException;
+import eu.dnetlib.dhp.common.collection.HttpClientParams;
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
public class CollectorWorker extends ReportingJob {
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java
index 545cbab0ca..708d2da65d 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorkerApplication.java
@@ -13,8 +13,10 @@ import org.apache.hadoop.fs.FileSystem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import eu.dnetlib.dhp.common.collection.CollectorException;
+import eu.dnetlib.dhp.common.collection.HttpClientParams;
import eu.dnetlib.dhp.message.MessageSender;
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java
index 457f634685..841d42fea8 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java
@@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin;
import java.util.stream.Stream;
-import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.ApiDescriptor;
-import eu.dnetlib.dhp.collection.CollectorException;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import eu.dnetlib.dhp.common.collection.CollectorException;
public interface CollectorPlugin {
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java
index 549c597204..ad28a72619 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java
@@ -13,11 +13,11 @@ import org.slf4j.LoggerFactory;
import com.mongodb.client.MongoCollection;
-import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.ApiDescriptor;
-import eu.dnetlib.dhp.collection.CollectorException;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
import eu.dnetlib.dhp.common.MdstoreClient;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import eu.dnetlib.dhp.common.collection.CollectorException;
public class MDStoreCollectorPlugin implements CollectorPlugin {
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java
index ec5bab448e..0364041410 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MongoDbDumpCollectorPlugin.java
@@ -12,10 +12,10 @@ import java.util.zip.GZIPInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.ApiDescriptor;
-import eu.dnetlib.dhp.collection.CollectorException;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import eu.dnetlib.dhp.common.collection.CollectorException;
import eu.dnetlib.dhp.utils.DHPUtils;
public class MongoDbDumpCollectorPlugin implements CollectorPlugin {
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java
index 9918e4abe2..878e286e0a 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiCollectorPlugin.java
@@ -13,11 +13,11 @@ import com.google.common.base.Splitter;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
-import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.ApiDescriptor;
-import eu.dnetlib.dhp.collection.CollectorException;
-import eu.dnetlib.dhp.collection.HttpClientParams;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import eu.dnetlib.dhp.common.collection.CollectorException;
+import eu.dnetlib.dhp.common.collection.HttpClientParams;
public class OaiCollectorPlugin implements CollectorPlugin {
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java
index 331dee6b47..3f767fd319 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIterator.java
@@ -19,10 +19,10 @@ import org.dom4j.io.XMLWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
-import eu.dnetlib.dhp.collection.CollectorException;
-import eu.dnetlib.dhp.collection.HttpConnector2;
import eu.dnetlib.dhp.collection.XmlCleaner;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import eu.dnetlib.dhp.common.collection.CollectorException;
+import eu.dnetlib.dhp.common.collection.HttpConnector2;
public class OaiIterator implements Iterator {
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java
index 48f6a94c86..1838223c23 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/oai/OaiIteratorFactory.java
@@ -3,9 +3,9 @@ package eu.dnetlib.dhp.collection.plugin.oai;
import java.util.Iterator;
-import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
-import eu.dnetlib.dhp.collection.HttpClientParams;
-import eu.dnetlib.dhp.collection.HttpConnector2;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import eu.dnetlib.dhp.common.collection.HttpClientParams;
+import eu.dnetlib.dhp.common.collection.HttpConnector2;
public class OaiIteratorFactory {
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java
index be2bbcece1..997948687b 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestCollectorPlugin.java
@@ -9,11 +9,11 @@ import java.util.stream.StreamSupport;
import org.apache.commons.lang3.StringUtils;
-import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.collection.ApiDescriptor;
-import eu.dnetlib.dhp.collection.CollectorException;
-import eu.dnetlib.dhp.collection.HttpClientParams;
import eu.dnetlib.dhp.collection.plugin.CollectorPlugin;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
+import eu.dnetlib.dhp.common.collection.CollectorException;
+import eu.dnetlib.dhp.common.collection.HttpClientParams;
/**
* TODO: delegate HTTP requests to the common HttpConnector2 implementation.
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java
index a90d259b44..64a041fd4a 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/rest/RestIterator.java
@@ -30,9 +30,9 @@ import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
-import eu.dnetlib.dhp.collection.CollectorException;
-import eu.dnetlib.dhp.collection.HttpClientParams;
import eu.dnetlib.dhp.collection.JsonUtils;
+import eu.dnetlib.dhp.common.collection.CollectorException;
+import eu.dnetlib.dhp.common.collection.HttpClientParams;
/**
* log.info(...) equal to log.trace(...) in the application-logs
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java
index a01703675d..4fe79bf769 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/TransformSparkJobNode.java
@@ -23,8 +23,8 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
-import eu.dnetlib.dhp.aggregation.common.AggregatorReport;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.aggregation.AggregatorReport;
import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup;
import eu.dnetlib.dhp.message.MessageSender;
import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion;
diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml
index e4f2715fb3..bd864a6aae 100644
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml
@@ -58,7 +58,7 @@
--hdfsNameNode${nameNode}
--fileURL${projectFileURL}
--hdfsPath${workingDir}/projects
- --classForNameeu.dnetlib.dhp.actionmanager.project.utils.CSVProject
+ --classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.CSVProject
@@ -70,7 +70,7 @@
--hdfsNameNode${nameNode}
--fileURL${programmeFileURL}
--hdfsPath${workingDir}/programme
- --classForNameeu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme
+ --classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.CSVProgramme
@@ -83,7 +83,7 @@
--fileURL${topicFileURL}
--hdfsPath${workingDir}/topic
--sheetName${sheetName}
- --classForNameeu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic
+ --classForNameeu.dnetlib.dhp.actionmanager.project.utils.model.EXCELTopic
diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java
deleted file mode 100644
index dd7e1910f6..0000000000
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/CSVParserTest.java
+++ /dev/null
@@ -1,31 +0,0 @@
-
-package eu.dnetlib.dhp.actionmanager.project;
-
-import java.util.List;
-
-import org.apache.commons.io.IOUtils;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Test;
-
-import eu.dnetlib.dhp.actionmanager.project.utils.CSVParser;
-
-class CSVParserTest {
-
- @Test
- void readProgrammeTest() throws Exception {
-
- String programmecsv = IOUtils
- .toString(
- getClass()
- .getClassLoader()
- .getResourceAsStream("eu/dnetlib/dhp/actionmanager/project/programme.csv"));
-
- CSVParser csvParser = new CSVParser();
-
- List
-
- com.opencsv
- opencsv
-
+
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java
index 9516cf6f76..d7d3698193 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java
+++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/hostedbymap/GetCSV.java
@@ -2,27 +2,16 @@
package eu.dnetlib.dhp.oa.graph.hostedbymap;
import java.io.*;
-import java.net.URL;
-import java.net.URLConnection;
-import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.opencsv.bean.CsvToBeanBuilder;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
+import eu.dnetlib.dhp.common.collection.HttpConnector2;
public class GetCSV {
- private static final Log log = LogFactory.getLog(eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV.class);
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
@@ -38,66 +27,45 @@ public class GetCSV {
final String hdfsPath = parser.get("workingPath");
final String hdfsNameNode = parser.get("hdfsNameNode");
final String classForName = parser.get("classForName");
+ final String delimiter = Optional
+ .ofNullable(parser.get("delimiter"))
+ .orElse(null);
final Boolean shouldReplace = Optional
.ofNullable((parser.get("replace")))
.map(Boolean::valueOf)
.orElse(false);
- URLConnection connection = new URL(fileURL).openConnection();
- connection
- .setRequestProperty(
- "User-Agent",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
- connection.connect();
+ char del = ';';
+ if (delimiter != null) {
+ del = delimiter.charAt(0);
+ }
+
+ HttpConnector2 connector2 = new HttpConnector2();
BufferedReader in = new BufferedReader(
- new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8")));
+ new InputStreamReader(connector2.getInputSourceAsStream(fileURL)));
- if (shouldReplace) {
- PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ.csv")));
- String line = null;
- while ((line = in.readLine()) != null) {
- writer.println(line.replace("\\\"", "\""));
+ if (Boolean.TRUE.equals(shouldReplace)) {
+ try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/replaced.csv")))) {
+ String line;
+ while ((line = in.readLine()) != null) {
+ writer.println(line.replace("\\\"", "\""));
+ }
}
- writer.close();
+
in.close();
- in = new BufferedReader(new FileReader("/tmp/DOAJ.csv"));
+ in = new BufferedReader(new FileReader("/tmp/replaced.csv"));
}
Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfsNameNode);
FileSystem fileSystem = FileSystem.get(conf);
- Path hdfsWritePath = new Path(hdfsPath);
- FSDataOutputStream fsDataOutputStream = null;
- if (fileSystem.exists(hdfsWritePath)) {
- fileSystem.delete(hdfsWritePath, false);
- }
- fsDataOutputStream = fileSystem.create(hdfsWritePath);
- BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8));
+ eu.dnetlib.dhp.common.collection.GetCSV.getCsv(fileSystem, in, hdfsPath, classForName, del);
- Class> clazz = Class.forName(classForName);
-
- ObjectMapper mapper = new ObjectMapper();
-
- new CsvToBeanBuilder(in)
- .withType(clazz)
- .withMultilineLimit(1)
- .build()
- .parse()
- .forEach(line -> {
- try {
- writer.write(mapper.writeValueAsString(line));
- writer.newLine();
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- });
-
- writer.close();
in.close();
- if (shouldReplace) {
+ if (Boolean.TRUE.equals(shouldReplace)) {
File f = new File("/tmp/DOAJ.csv");
f.delete();
}
diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml
index 71b20b356d..26b1d59931 100644
--- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/workflow.xml
@@ -100,7 +100,7 @@
- eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV
+ eu.dnetlib.dhp.common.collection.GetCSV
--hdfsNameNode${nameNode}
--fileURL${unibiFileURL}
--workingPath${workingDir}/unibi_gold
@@ -112,7 +112,7 @@
- eu.dnetlib.dhp.oa.graph.hostedbymap.GetCSV
+ eu.dnetlib.dhp.common.collection.GetCSV
--hdfsNameNode${nameNode}
--fileURL${doajFileURL}
--workingPath${workingDir}/doaj
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala
index 2ed76a72ac..5b00e9b6f1 100644
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala
+++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestPreprocess.scala
@@ -1,6 +1,5 @@
package eu.dnetlib.dhp.oa.graph.hostedbymap
-import eu.dnetlib.dhp.oa.graph.hostedbymap.{Aggregators, Constants, HostedByInfo, HostedByItemType, SparkProduceHostedByMap}
import eu.dnetlib.dhp.schema.oaf.Datasource
import org.apache.spark.SparkConf
import org.apache.spark.sql.{Dataset, Encoder, Encoders, SparkSession}
diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java
deleted file mode 100644
index 89259d8147..0000000000
--- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/hostedbymap/TestReadCSV.java
+++ /dev/null
@@ -1,112 +0,0 @@
-
-package eu.dnetlib.dhp.oa.graph.hostedbymap;
-
-import java.io.*;
-import java.net.URL;
-import java.net.URLConnection;
-import java.nio.charset.Charset;
-import java.util.List;
-
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Disabled;
-import org.junit.jupiter.api.Test;
-
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.opencsv.bean.CsvToBeanBuilder;
-
-import eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel;
-
-public class TestReadCSV {
-
- @Test
- public void testCSVUnibi() throws FileNotFoundException {
-
- final String sourcePath = getClass()
- .getResource("/eu/dnetlib/dhp/oa/graph/hostedbymap/unibiGold.csv")
- .getPath();
-
- List beans = new CsvToBeanBuilder(new FileReader(sourcePath))
- .withType(UnibiGoldModel.class)
- .build()
- .parse();
-
- Assertions.assertEquals(36, beans.size());
- Assertions.assertEquals(1, beans.stream().filter(e -> e.getIssn().equals("0001-625X")).count());
- Assertions
- .assertTrue(
- beans
- .stream()
- .anyMatch(e -> e.getIssn().equals("0001-625X") && e.getTitle().equals("Acta Mycologica")));
- Assertions.assertTrue(beans.stream().allMatch(e -> e.getIssn().equals(e.getIssn_l())));
-
- }
-
- @Disabled
- @Test
- public void testCSVUrlUnibi() throws IOException {
-
- URL csv = new URL("https://pub.uni-bielefeld.de/download/2944717/2944718/issn_gold_oa_version_4.csv");
-
- BufferedReader in = new BufferedReader(new InputStreamReader(csv.openStream()));
- ObjectMapper mapper = new ObjectMapper();
-
- new CsvToBeanBuilder(in)
- .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.UnibiGoldModel.class)
- .build()
- .parse()
- .forEach(line ->
-
- {
- try {
- System.out.println(mapper.writeValueAsString(line));
- } catch (JsonProcessingException e) {
- e.printStackTrace();
- }
- }
-
- );
- }
-
- @Disabled
- @Test
- public void testCSVUrlDOAJ() throws IOException {
-
- URLConnection connection = new URL("https://doaj.org/csv").openConnection();
- connection
- .setRequestProperty(
- "User-Agent",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
- connection.connect();
-
- BufferedReader in = new BufferedReader(
- new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8")));
- // BufferedReader in = new BufferedReader(new FileReader("/tmp/DOAJ.csv"));
- PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter("/tmp/DOAJ_1.csv")));
- String line = null;
- while ((line = in.readLine()) != null) {
- writer.println(line.replace("\\\"", "\""));
- }
- writer.close();
- in.close();
- in = new BufferedReader(new FileReader("/tmp/DOAJ_1.csv"));
- ObjectMapper mapper = new ObjectMapper();
-
- new CsvToBeanBuilder(in)
- .withType(eu.dnetlib.dhp.oa.graph.hostedbymap.model.DOAJModel.class)
- .withMultilineLimit(1)
- .build()
- .parse()
- .forEach(lline ->
-
- {
- try {
- System.out.println(mapper.writeValueAsString(lline));
- } catch (JsonProcessingException e) {
- e.printStackTrace();
- }
- }
-
- );
- }
-}
diff --git a/pom.xml b/pom.xml
index d31ffb88d5..a5e9f05477 100644
--- a/pom.xml
+++ b/pom.xml
@@ -524,7 +524,11 @@
opencsv
5.5
-
+
+ io.github.classgraph
+ classgraph
+ 4.8.71
+