forked from antonis.lempesis/dnet-hadoop
fixed issue related to change in the file name downloaded. Added sheet name as parameter and also a check if the name should change
This commit is contained in:
parent
02b80cf24f
commit
dc0ad8d2e0
|
@ -22,7 +22,7 @@ import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
|||
*/
|
||||
public class EXCELParser {
|
||||
|
||||
public <R> List<R> parse(InputStream file, String classForName)
|
||||
public <R> List<R> parse(InputStream file, String classForName, String sheetName)
|
||||
throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException,
|
||||
InvalidFormatException {
|
||||
|
||||
|
@ -30,7 +30,11 @@ public class EXCELParser {
|
|||
OPCPackage pkg = OPCPackage.open(file);
|
||||
XSSFWorkbook wb = new XSSFWorkbook(pkg);
|
||||
|
||||
XSSFSheet sheet = wb.getSheet("cordisref-H2020topics");
|
||||
XSSFSheet sheet = wb.getSheet(sheetName);
|
||||
|
||||
if(sheetName == null){
|
||||
throw new RuntimeException("Sheet name " + sheetName + " not present in current file");
|
||||
}
|
||||
|
||||
List<R> ret = new ArrayList<>();
|
||||
|
||||
|
@ -49,7 +53,7 @@ public class EXCELParser {
|
|||
headers.add(dataFormatter.formatCellValue(cell));
|
||||
}
|
||||
} else {
|
||||
Class<?> clazz = Class.forName("eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic");
|
||||
Class<?> clazz = Class.forName(classForName);
|
||||
final Object cc = clazz.newInstance();
|
||||
|
||||
for (int i = 0; i < headers.size(); i++) {
|
||||
|
|
|
@ -42,19 +42,20 @@ public class ReadExcel implements Closeable {
|
|||
final String hdfsPath = parser.get("hdfsPath");
|
||||
final String hdfsNameNode = parser.get("hdfsNameNode");
|
||||
final String classForName = parser.get("classForName");
|
||||
final String sheetName = parser.get("sheetName");
|
||||
|
||||
try (final ReadExcel readExcel = new ReadExcel(hdfsPath, hdfsNameNode, fileURL)) {
|
||||
|
||||
log.info("Getting Excel file...");
|
||||
readExcel.execute(classForName);
|
||||
readExcel.execute(classForName, sheetName);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public void execute(final String classForName) throws Exception {
|
||||
public void execute(final String classForName, final String sheetName) throws Exception {
|
||||
EXCELParser excelParser = new EXCELParser();
|
||||
excelParser
|
||||
.parse(excelFile, classForName)
|
||||
.parse(excelFile, classForName, sheetName)
|
||||
.stream()
|
||||
.forEach(p -> write(p));
|
||||
|
||||
|
|
|
@ -65,6 +65,7 @@
|
|||
<arg>--hdfsNameNode</arg><arg>${nameNode}</arg>
|
||||
<arg>--fileURL</arg><arg>${topicFileURL}</arg>
|
||||
<arg>--hdfsPath</arg><arg>${workingDir}/topic</arg>
|
||||
<arg>--sheetName</arg><arg>${sheetName}</arg>
|
||||
<arg>--classForName</arg><arg>eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic</arg>
|
||||
</java>
|
||||
<ok to="read_projects"/>
|
||||
|
|
|
@ -23,6 +23,11 @@
|
|||
"paramLongName" : "classForName",
|
||||
"paramDescription" : "the name of the class to deserialize the csv to",
|
||||
"paramRequired" : true
|
||||
}, {
|
||||
"paramName": "sn",
|
||||
"paramLongName" : "sheetName",
|
||||
"paramDescription" : "the name of the sheet in case the file is excel",
|
||||
"paramRequired" : false
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue