From 6e84b3951f685061cc61e71c124cf4a7518ccee2 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 12 Aug 2021 17:57:41 +0200 Subject: [PATCH] GetCSV refactoring - moving classes to dhp-common that have dependency with GetCSV class (that was located in graph-mapper) --- .../common/aggregation}/AggregatorReport.java | 2 +- .../collection/CollectorException.java | 2 +- .../dnetlib/dhp/common/collection/GetCSV.java | 65 +++++++++++++++++++ .../common}/collection/HttpClientParams.java | 2 +- .../common}/collection/HttpConnector2.java | 4 +- 5 files changed, 70 insertions(+), 5 deletions(-) rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common => dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation}/AggregatorReport.java (96%) rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp => dhp-common/src/main/java/eu/dnetlib/dhp/common}/collection/CollectorException.java (93%) create mode 100644 dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp => dhp-common/src/main/java/eu/dnetlib/dhp/common}/collection/HttpClientParams.java (97%) rename {dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp => dhp-common/src/main/java/eu/dnetlib/dhp/common}/collection/HttpConnector2.java (98%) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java similarity index 96% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java index 8e46ab92b..c5926848e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/aggregation/common/AggregatorReport.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/aggregation/AggregatorReport.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.aggregation.common; +package eu.dnetlib.dhp.common.aggregation; import java.io.Closeable; import java.io.IOException; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java similarity index 93% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java index 144d297e6..5d94c2f89 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorException.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/CollectorException.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.collection; +package eu.dnetlib.dhp.common.collection; public class CollectorException extends Exception { diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java new file mode 100644 index 000000000..44f4121eb --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/GetCSV.java @@ -0,0 +1,65 @@ + +package eu.dnetlib.dhp.common.collection; + +import java.io.*; +import java.net.URL; +import java.net.URLConnection; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Optional; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.bean.CsvToBeanBuilder; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class GetCSV { + + public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath, + String modelClass) throws IOException, ClassNotFoundException { + getCsv(fileSystem, reader, hdfsPath, modelClass, ','); + + } + + public static void getCsv(FileSystem fileSystem, BufferedReader reader, String hdfsPath, + String modelClass, char delimiter) throws IOException, ClassNotFoundException { + + Path hdfsWritePath = new Path(hdfsPath); + FSDataOutputStream fsDataOutputStream = null; + if (fileSystem.exists(hdfsWritePath)) { + fileSystem.delete(hdfsWritePath, false); + } + fsDataOutputStream = fileSystem.create(hdfsWritePath); + + try(BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fsDataOutputStream, StandardCharsets.UTF_8))){ + + ObjectMapper mapper = new ObjectMapper(); + + new CsvToBeanBuilder(reader) + .withType(Class.forName(modelClass)) + .withSeparator(delimiter) + .build() + .parse() + .forEach(line -> { + try { + writer.write(mapper.writeValueAsString(line)); + writer.newLine(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java similarity index 97% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java index ab0d5cc02..6fcec00dd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpClientParams.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpClientParams.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.collection; +package eu.dnetlib.dhp.common.collection; /** * Bundles the http connection parameters driving the client behaviour. diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java similarity index 98% rename from dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java rename to dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index 8493a3436..724f5f0e1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -1,5 +1,5 @@ -package eu.dnetlib.dhp.collection; +package eu.dnetlib.dhp.common.collection; import static eu.dnetlib.dhp.utils.DHPUtils.*; @@ -15,7 +15,7 @@ import org.apache.http.HttpHeaders; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.aggregation.common.AggregatorReport; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; /** * Migrated from https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HttpConnector.java