forked from D-Net/dnet-hadoop
now the commission give us the framework programme (FP7 - H2020) so use this information to filter out programmes not associated to H2020
This commit is contained in:
parent
b66f930466
commit
609ff17cfc
|
@ -120,7 +120,7 @@ public class PrepareProgramme {
|
||||||
|
|
||||||
JavaRDD<CSVProgramme> h2020Programmes = programme
|
JavaRDD<CSVProgramme> h2020Programmes = programme
|
||||||
.toJavaRDD()
|
.toJavaRDD()
|
||||||
.filter(p -> !p.getCode().contains("FP7"))
|
.filter(p -> p.getFrameworkProgramme().trim().equalsIgnoreCase("H2020"))
|
||||||
.mapToPair(csvProgramme -> new Tuple2<>(csvProgramme.getCode(), csvProgramme))
|
.mapToPair(csvProgramme -> new Tuple2<>(csvProgramme.getCode(), csvProgramme))
|
||||||
.reduceByKey((a, b) -> {
|
.reduceByKey((a, b) -> {
|
||||||
if (!a.getLanguage().equals("en")) {
|
if (!a.getLanguage().equals("en")) {
|
||||||
|
|
Loading…
Reference in New Issue