fixed issue related to change in the file name downloaded. Added sheet name as parameter and also a check if the name should change

This commit is contained in:
Miriam Baglioni 2021-05-20 14:53:53 +02:00
parent 02b80cf24f
commit dc0ad8d2e0
4 changed files with 17 additions and 6 deletions

View File

@ -22,7 +22,7 @@ import org.apache.poi.xssf.usermodel.XSSFWorkbook;
*/ */
public class EXCELParser { public class EXCELParser {
public <R> List<R> parse(InputStream file, String classForName) public <R> List<R> parse(InputStream file, String classForName, String sheetName)
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException, throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException,
InvalidFormatException { InvalidFormatException {
@ -30,7 +30,11 @@ public class EXCELParser {
OPCPackage pkg = OPCPackage.open(file); OPCPackage pkg = OPCPackage.open(file);
XSSFWorkbook wb = new XSSFWorkbook(pkg); XSSFWorkbook wb = new XSSFWorkbook(pkg);
XSSFSheet sheet = wb.getSheet("cordisref-H2020topics"); XSSFSheet sheet = wb.getSheet(sheetName);
if(sheetName == null){
throw new RuntimeException("Sheet name " + sheetName + " not present in current file");
}
List<R> ret = new ArrayList<>(); List<R> ret = new ArrayList<>();
@ -49,7 +53,7 @@ public class EXCELParser {
headers.add(dataFormatter.formatCellValue(cell)); headers.add(dataFormatter.formatCellValue(cell));
} }
} else { } else {
Class<?> clazz = Class.forName("eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic"); Class<?> clazz = Class.forName(classForName);
final Object cc = clazz.newInstance(); final Object cc = clazz.newInstance();
for (int i = 0; i < headers.size(); i++) { for (int i = 0; i < headers.size(); i++) {

View File

@ -42,19 +42,20 @@ public class ReadExcel implements Closeable {
final String hdfsPath = parser.get("hdfsPath"); final String hdfsPath = parser.get("hdfsPath");
final String hdfsNameNode = parser.get("hdfsNameNode"); final String hdfsNameNode = parser.get("hdfsNameNode");
final String classForName = parser.get("classForName"); final String classForName = parser.get("classForName");
final String sheetName = parser.get("sheetName");
try (final ReadExcel readExcel = new ReadExcel(hdfsPath, hdfsNameNode, fileURL)) { try (final ReadExcel readExcel = new ReadExcel(hdfsPath, hdfsNameNode, fileURL)) {
log.info("Getting Excel file..."); log.info("Getting Excel file...");
readExcel.execute(classForName); readExcel.execute(classForName, sheetName);
} }
} }
public void execute(final String classForName) throws Exception { public void execute(final String classForName, final String sheetName) throws Exception {
EXCELParser excelParser = new EXCELParser(); EXCELParser excelParser = new EXCELParser();
excelParser excelParser
.parse(excelFile, classForName) .parse(excelFile, classForName, sheetName)
.stream() .stream()
.forEach(p -> write(p)); .forEach(p -> write(p));

View File

@ -65,6 +65,7 @@
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg> <arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
<arg>--fileURL</arg><arg>${topicFileURL}</arg> <arg>--fileURL</arg><arg>${topicFileURL}</arg>
<arg>--hdfsPath</arg><arg>${workingDir}/topic</arg> <arg>--hdfsPath</arg><arg>${workingDir}/topic</arg>
<arg>--sheetName</arg><arg>${sheetName}</arg>
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic</arg> <arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic</arg>
</java> </java>
<ok to="read_projects"/> <ok to="read_projects"/>

View File

@ -23,6 +23,11 @@
"paramLongName" : "classForName", "paramLongName" : "classForName",
"paramDescription" : "the name of the class to deserialize the csv to", "paramDescription" : "the name of the class to deserialize the csv to",
"paramRequired" : true "paramRequired" : true
}, {
"paramName": "sn",
"paramLongName" : "sheetName",
"paramDescription" : "the name of the sheet in case the file is excel",
"paramRequired" : false
} }