diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java index b2d3253d5..18362da68 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProgramme.java @@ -143,7 +143,6 @@ public class PrepareProgramme { JavaRDD h2020Programmes = programme .toJavaRDD() - .filter(p -> p.getFrameworkProgramme().trim().equalsIgnoreCase("H2020")) .mapToPair(csvProgramme -> new Tuple2<>(csvProgramme.getCode(), csvProgramme)) .reduceByKey((a, b) -> { if (!a.getLanguage().equals("en")) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java index e5cae0ff7..07786d62d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/PrepareProjects.java @@ -18,7 +18,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme; + import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.common.HdfsSupport; @@ -32,7 +32,7 @@ public class PrepareProjects { private static final Logger log = LoggerFactory.getLogger(PrepareProgramme.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final HashMap programmeMap = new HashMap<>(); + public static void main(String[] args) throws Exception { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java index a583b7bfa..fdc12c662 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/SparkAtomicActionJob.java @@ -120,7 +120,6 @@ public class SparkAtomicActionJob { .map((MapFunction, Project>) c -> { CSVProject csvProject = c._1(); - Optional ocsvProgramme = Optional.ofNullable(c._2()); return Optional .ofNullable(c._2()) @@ -135,9 +134,9 @@ public class SparkAtomicActionJob { H2020Programme pm = new H2020Programme(); H2020Classification h2020classification = new H2020Classification(); pm.setCode(csvProject.getProgramme()); - h2020classification.setClassification(ocsvProgramme.get().getClassification()); + h2020classification.setClassification(csvProgramme.getClassification()); h2020classification.setH2020Programme(pm); - setLevelsandProgramme(h2020classification, ocsvProgramme.get().getClassification_short()); + setLevelsandProgramme(h2020classification, csvProgramme.getClassification_short()); // setProgramme(h2020classification, ocsvProgramme.get().getClassification()); pp.setH2020classification(Arrays.asList(h2020classification)); @@ -145,10 +144,11 @@ public class SparkAtomicActionJob { }) .orElse(null); - }, Encoders.bean(Project.class)); + }, Encoders.bean(Project.class)) + .filter(Objects::nonNull); aaproject - .joinWith(topic, aaproject.col("h2020topiccode").equalTo(topic.col("code"))) + .joinWith(topic, aaproject.col("h2020topiccode").equalTo(topic.col("code")), "left") .map((MapFunction, Project>) p -> { Optional op = Optional.ofNullable(p._2()); Project rp = p._1(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java index f991a4297..d486f0104 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/CSVProgramme.java @@ -7,14 +7,7 @@ import java.io.Serializable; * The model for the programme csv file */ public class CSVProgramme implements Serializable { - private String parentProgramme; - private String frameworkProgramme; - private String startDate; - private String endDate; - private String objective; - private String subjects; - private String legalBasis; - private String call; + private String rcn; private String code; @@ -80,67 +73,5 @@ public class CSVProgramme implements Serializable { this.language = language; } - public String getParentProgramme() { - return parentProgramme; - } - - public void setParentProgramme(String parentProgramme) { - this.parentProgramme = parentProgramme; - } - - public String getFrameworkProgramme() { - return frameworkProgramme; - } - - public void setFrameworkProgramme(String frameworkProgramme) { - this.frameworkProgramme = frameworkProgramme; - } - - public String getStartDate() { - return startDate; - } - - public void setStartDate(String startDate) { - this.startDate = startDate; - } - - public String getEndDate() { - return endDate; - } - - public void setEndDate(String endDate) { - this.endDate = endDate; - } - - public String getObjective() { - return objective; - } - - public void setObjective(String objective) { - this.objective = objective; - } - - public String getSubjects() { - return subjects; - } - - public void setSubjects(String subjects) { - this.subjects = subjects; - } - - public String getLegalBasis() { - return legalBasis; - } - - public void setLegalBasis(String legalBasis) { - this.legalBasis = legalBasis; - } - - public String getCall() { - return call; - } - - public void setCall(String call) { - this.call = call; - } +// } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java index 1a6ebb9e8..5f5b61d8b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/project/utils/EXCELParser.java @@ -26,7 +26,6 @@ public class EXCELParser { throws ClassNotFoundException, IOException, IllegalAccessException, InstantiationException, InvalidFormatException { - // OPCPackage pkg = OPCPackage.open(httpConnector.getInputSourceAsStream(URL)); OPCPackage pkg = OPCPackage.open(file); XSSFWorkbook wb = new XSSFWorkbook(pkg); @@ -58,7 +57,6 @@ public class EXCELParser { for (int i = 0; i < headers.size(); i++) { Cell cell = row.getCell(i); - String value = dataFormatter.formatCellValue(cell); FieldUtils.writeField(cc, headers.get(i), dataFormatter.formatCellValue(cell), true); } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml index 8ce581885..e4f2715fb 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/project/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + projectFileURL @@ -18,6 +18,10 @@ outputPath path where to store the action set + + sheetName + the name of the sheet to read + @@ -31,10 +35,23 @@ - + + + + + + + + + + + + + + eu.dnetlib.dhp.actionmanager.project.utils.ReadCSV @@ -43,7 +60,7 @@ --hdfsPath${workingDir}/projects --classForNameeu.dnetlib.dhp.actionmanager.project.utils.CSVProject - + @@ -55,7 +72,7 @@ --hdfsPath${workingDir}/programme --classForNameeu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme - + @@ -68,7 +85,7 @@ --sheetName${sheetName} --classForNameeu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic - + @@ -81,7 +98,7 @@ --postgresUser${postgresUser} --postgresPassword${postgresPassword} - + @@ -105,10 +122,15 @@ --programmePath${workingDir}/programme --outputPath${workingDir}/preparedProgramme - + + + + + + yarn @@ -130,7 +152,7 @@ --outputPath${workingDir}/preparedProjects --dbProjectPath${workingDir}/dbProjects - + diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java index 72ba48f41..e48dcd061 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/project/EXCELParserTest.java @@ -21,7 +21,7 @@ public class EXCELParserTest { private static Path workingDir; private HttpConnector httpConnector = new HttpConnector(); - private static final String URL = "http://cordis.europa.eu/data/reference/cordisref-H2020topics.xlsx"; + private static final String URL = "https://cordis.europa.eu/data/reference/cordisref-h2020topics.xlsx"; @BeforeAll public static void beforeAll() throws IOException { @@ -35,11 +35,12 @@ public class EXCELParserTest { EXCELParser excelParser = new EXCELParser(); - final String classForName = "eu.dnetlib.dhp.actionmanager.project.utils.ExcelTopic"; - final String sheetName = "Topics"; - List pl = excelParser.parse(httpConnector.getInputSourceAsStream(URL), classForName, sheetName); + List pl = excelParser + .parse( + httpConnector.getInputSourceAsStream(URL), "eu.dnetlib.dhp.actionmanager.project.utils.EXCELTopic", + "Topics"); - Assertions.assertEquals(3837, pl.size()); + Assertions.assertEquals(3878, pl.size()); } } diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz index 01e804ff5..620e1abfb 100644 Binary files a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/preparedProgramme_whole.json.gz differ diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz index 8b1982dee..71f132ad1 100644 Binary files a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz and b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/project/whole_programme.json.gz differ