From dc0ad8d2e09450b40fc14f3caafbda861d5b25fc Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Thu, 20 May 2021 14:53:53 +0200 Subject: [PATCH] fixed issue related to change in the file name downloaded. Added sheet name as parameter and also a check if the name should change --- .../dhp/actionmanager/project/utils/EXCELParser.java | 10 +++++++--- .../dhp/actionmanager/project/utils/ReadExcel.java | 7 ++++--- .../dhp/actionmanager/project/oozie_app/workflow.xml | 1 + .../dnetlib/dhp/actionmanager/project/parameters.json | 5 +++++ 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java index 0f83499e4..cc18c6f54 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java @@ -22,7 +22,7 @@ import org.apache.poi.xssf.usermodel.XSSFWorkbook; */ public class EXCELParser { - public List parse(InputStream file, String classForName) + public List parse(InputStream file, String classForName, String sheetName) throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException, InvalidFormatException { @@ -30,7 +30,11 @@ public class EXCELParser { OPCPackage pkg = OPCPackage.open(file); XSSFWorkbook wb = new XSSFWorkbook(pkg); - XSSFSheet sheet = wb.getSheet("cordisref-H2020topics"); + XSSFSheet sheet = wb.getSheet(sheetName); + + if(sheetName == null){ + throw new RuntimeException("Sheet name " + sheetName + " not present in current file"); + } List ret = new ArrayList<>(); @@ -49,7 +53,7 @@ public class EXCELParser { headers.add(dataFormatter.formatCellValue(cell)); } } else { - Class clazz = Class.forName("eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic"); + Class clazz = Class.forName(classForName); final Object cc = clazz.newInstance(); for (int i = 0; i < headers.size(); i++) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java index 23b58f2a0..7644ba04c 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/ReadExcel.java @@ -42,19 +42,20 @@ public class ReadExcel implements Closeable { final String hdfsPath = parser.get("hdfsPath"); final String hdfsNameNode = parser.get("hdfsNameNode"); final String classForName = parser.get("classForName"); + final String sheetName = parser.get("sheetName"); try (final ReadExcel readExcel = new ReadExcel(hdfsPath, hdfsNameNode, fileURL)) { log.info("Getting Excel file..."); - readExcel.execute(classForName); + readExcel.execute(classForName, sheetName); } } - public void execute(final String classForName) throws Exception { + public void execute(final String classForName, final String sheetName) throws Exception { EXCELParser excelParser = new EXCELParser(); excelParser - .parse(excelFile, classForName) + .parse(excelFile, classForName, sheetName) .stream() .forEach(p -> write(p)); diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml index c710c8b55..8ce581885 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -65,6 +65,7 @@ --hdfsNameNode${nameNode} --fileURL${topicFileURL} --hdfsPath${workingDir}/topic + --sheetName${sheetName} --classForNameeu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json index dd3de70f6..b6c9c94b9 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/parameters.json @@ -23,6 +23,11 @@ "paramLongName" : "classForName", "paramDescription" : "the name of the class to deserialize the csv to", "paramRequired" : true +}, { + "paramName": "sn", + "paramLongName" : "sheetName", + "paramDescription" : "the name of the sheet in case the file is excel", + "paramRequired" : false }