Adding H2020 Classification, topic code and topic description to H2020 projects #46

Merged
claudio.atzori merged 59 commits from miriam.baglioni/dnet-hadoop:h2020classification into master 2020-10-05 14:14:39 +02:00
1 changed files with 16 additions and 18 deletions
Showing only changes of commit f8f5cfd5cc - Show all commits

View File

@ -6,9 +6,7 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.*; import java.util.*;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
@ -20,8 +18,8 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProgramme; import eu.dnetlib.dhp.actionmanager.project.utils.CSVProgramme;
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject; import eu.dnetlib.dhp.actionmanager.project.utils.CSVProject;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.HdfsSupport; import eu.dnetlib.dhp.common.HdfsSupport;
import scala.Tuple2; import scala.Tuple2;
@ -97,25 +95,25 @@ public class PrepareProjects {
if (csvProject.isPresent()) { if (csvProject.isPresent()) {
String[] programme = csvProject.get().getProgramme().split(";"); String[] programme = csvProject.get().getProgramme().split(";");
String topic = csvProject.get().getTopics(); // String topic = csvProject.get().getTopics();
String topicdescription = Optional // String topicdescription = Optional
.ofNullable(csvProject.get().getTopics()) // .ofNullable(csvProject.get().getTopics())
.map(topics -> { // .map(topics -> {
if (topic.equalsIgnoreCase(value._1().getTopiccode())) { // if (topic.equalsIgnoreCase(value._1().getTopiccode())) {
return value._1().getTopicdescription(); // return value._1().getTopicdescription();
} // }
return null; // return null;
}) // })
.orElse(null); // .orElse(null);
Arrays Arrays
.stream(programme) .stream(programme)
.forEach(p -> { .forEach(p -> {
CSVProject proj = new CSVProject(); CSVProject proj = new CSVProject();
proj.setTopics(topic); // proj.setTopics(topic);
if (topicdescription != null) { // if (topicdescription != null) {
proj.setTopicdescription(topicdescription); // proj.setTopicdescription(topicdescription);
} // }
proj.setProgramme(p); proj.setProgramme(p);
proj.setId(csvProject.get().getId()); proj.setId(csvProject.get().getId());