forked from D-Net/dnet-hadoop
applying sonarLint suggestions
This commit is contained in:
parent
2dd5449c13
commit
5edcc6832a
|
@ -144,22 +144,7 @@ public class PrepareProgramme {
|
||||||
JavaRDD<CSVProgramme> h2020Programmes = programme
|
JavaRDD<CSVProgramme> h2020Programmes = programme
|
||||||
.toJavaRDD()
|
.toJavaRDD()
|
||||||
.mapToPair(csvProgramme -> new Tuple2<>(csvProgramme.getCode(), csvProgramme))
|
.mapToPair(csvProgramme -> new Tuple2<>(csvProgramme.getCode(), csvProgramme))
|
||||||
.reduceByKey((a, b) -> {
|
.reduceByKey(PrepareProgramme::groupProgrammeByCode)
|
||||||
if (!a.getLanguage().equals("en")) {
|
|
||||||
if (b.getLanguage().equalsIgnoreCase("en")) {
|
|
||||||
a.setTitle(b.getTitle());
|
|
||||||
a.setLanguage(b.getLanguage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (StringUtils.isEmpty(a.getShortTitle())) {
|
|
||||||
if (!StringUtils.isEmpty(b.getShortTitle())) {
|
|
||||||
a.setShortTitle(b.getShortTitle());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return a;
|
|
||||||
|
|
||||||
})
|
|
||||||
.map(p -> {
|
.map(p -> {
|
||||||
CSVProgramme csvProgramme = p._2();
|
CSVProgramme csvProgramme = p._2();
|
||||||
String programmeTitle = csvProgramme.getTitle().trim();
|
String programmeTitle = csvProgramme.getTitle().trim();
|
||||||
|
@ -176,20 +161,31 @@ public class PrepareProgramme {
|
||||||
return csvProgramme;
|
return csvProgramme;
|
||||||
});
|
});
|
||||||
|
|
||||||
// prepareClassification(h2020Programmes);
|
final JavaSparkContext jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
|
|
||||||
|
|
||||||
JavaRDD<CSVProgramme> rdd = jsc.parallelize(prepareClassification(h2020Programmes), 1);
|
JavaRDD<CSVProgramme> rdd = jsc.parallelize(prepareClassification(h2020Programmes), 1);
|
||||||
rdd
|
rdd
|
||||||
.map(csvProgramme -> {
|
.map(OBJECT_MAPPER::writeValueAsString)
|
||||||
String tmp = OBJECT_MAPPER.writeValueAsString(csvProgramme);
|
|
||||||
return tmp;
|
|
||||||
})
|
|
||||||
.saveAsTextFile(outputPath);
|
.saveAsTextFile(outputPath);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static CSVProgramme groupProgrammeByCode(CSVProgramme a, CSVProgramme b) {
|
||||||
|
if (!a.getLanguage().equals("en")) {
|
||||||
|
if (b.getLanguage().equalsIgnoreCase("en")) {
|
||||||
|
a.setTitle(b.getTitle());
|
||||||
|
a.setLanguage(b.getLanguage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (StringUtils.isEmpty(a.getShortTitle())) {
|
||||||
|
if (!StringUtils.isEmpty(b.getShortTitle())) {
|
||||||
|
a.setShortTitle(b.getShortTitle());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
private static List<CSVProgramme> prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) {
|
private static List<CSVProgramme> prepareClassification(JavaRDD<CSVProgramme> h2020Programmes) {
|
||||||
Object[] codedescription = h2020Programmes
|
Object[] codedescription = h2020Programmes
|
||||||
.map(
|
.map(
|
||||||
|
@ -240,10 +236,10 @@ public class PrepareProgramme {
|
||||||
if (!ent.contains("Euratom")) {
|
if (!ent.contains("Euratom")) {
|
||||||
|
|
||||||
String parent;
|
String parent;
|
||||||
String tmp_key = tmp[0] + ".";
|
String tmpKey = tmp[0] + ".";
|
||||||
for (int i = 1; i < tmp.length - 1; i++) {
|
for (int i = 1; i < tmp.length - 1; i++) {
|
||||||
tmp_key += tmp[i] + ".";
|
tmpKey += tmp[i] + ".";
|
||||||
parent = map.get(tmp_key)._1().toLowerCase().trim();
|
parent = map.get(tmpKey)._1().toLowerCase().trim();
|
||||||
if (parent.contains("|")) {
|
if (parent.contains("|")) {
|
||||||
parent = parent.substring(parent.lastIndexOf("|") + 1).trim();
|
parent = parent.substring(parent.lastIndexOf("|") + 1).trim();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue