Adding H2020 Classification, topic code and topic description to H2020 projects #46

Merged
claudio.atzori merged 59 commits from miriam.baglioni/dnet-hadoop:h2020classification into master 2020-10-05 14:14:39 +02:00
4 changed files with 140 additions and 13 deletions
Showing only changes of commit 1069cf243a - Show all commits

View File

@ -4,7 +4,7 @@ package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable; import java.io.Serializable;
import java.util.Objects; import java.util.Objects;
public class Programme implements Serializable { public class H2020Programme implements Serializable {
private String code; private String code;
private String description; private String description;
@ -31,8 +31,8 @@ public class Programme implements Serializable {
if (o == null || getClass() != o.getClass()) if (o == null || getClass() != o.getClass())
return false; return false;
Programme programme = (Programme) o; H2020Programme h2020Programme = (H2020Programme) o;
return Objects.equals(code, programme.code); return Objects.equals(code, h2020Programme.code);
} }
} }

View File

@ -0,0 +1,82 @@
package eu.dnetlib.dhp.schema.oaf;
import java.io.Serializable;
import java.util.Objects;
public class H2020classification implements Serializable {
private H2020Programme h2020Programme;
private String level1;
private String level2;
private String level3;
private String classification;
public H2020Programme getH2020Programme() {
return h2020Programme;
}
public void setH2020Programme(H2020Programme h2020Programme) {
this.h2020Programme = h2020Programme;
}
public String getLevel1() {
return level1;
}
public void setLevel1(String level1) {
this.level1 = level1;
}
public String getLevel2() {
return level2;
}
public void setLevel2(String level2) {
this.level2 = level2;
}
public String getLevel3() {
return level3;
}
public void setLevel3(String level3) {
this.level3 = level3;
}
public String getClassification() {
return classification;
}
public void setClassification(String classification) {
this.classification = classification;
}
public void setLevels() {
String[] tmp = classification.split(" $ ");
level1 = tmp[0];
if(tmp.length > 1){
level2 = tmp[1];
}
if(tmp.length > 2){
level3 = tmp[2];
}
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
H2020classification h2020classification = (H2020classification)o;
return Objects.equals(level1, h2020classification.level1) &&
Objects.equals(level2, h2020classification.level2) &&
Objects.equals(level3, h2020classification.level3) &&
Objects.equals(classification, h2020classification.classification) &&
h2020Programme.equals(h2020classification.h2020Programme);
}
}

View File

@ -58,7 +58,26 @@ public class Project extends OafEntity implements Serializable {
private Float fundedamount; private Float fundedamount;
private List<Programme> programme; private String topic;
private List<H2020classification> h2020classification;
public String getTopic() {
return topic;
}
public void setTopic(String topic) {
this.topic = topic;
}
public List<H2020classification> getH2020classification() {
return h2020classification;
}
public void setH2020classification(List<H2020classification> h2020classification) {
this.h2020classification = h2020classification;
}
public Field<String> getWebsiteurl() { public Field<String> getWebsiteurl() {
return websiteurl; return websiteurl;
@ -268,13 +287,13 @@ public class Project extends OafEntity implements Serializable {
this.fundedamount = fundedamount; this.fundedamount = fundedamount;
} }
public List<Programme> getProgramme() { // public List<Programme> getProgramme() {
return programme; // return programme;
} // }
//
public void setProgramme(List<Programme> programme) { // public void setProgramme(List<Programme> programme) {
this.programme = programme; // this.programme = programme;
} // }
@Override @Override
public void mergeFrom(OafEntity e) { public void mergeFrom(OafEntity e) {
@ -331,7 +350,9 @@ public class Project extends OafEntity implements Serializable {
? p.getFundedamount() ? p.getFundedamount()
: fundedamount; : fundedamount;
programme = mergeLists(programme, p.getProgramme()); //programme = mergeLists(programme, p.getProgramme());
Review

please remove unused code lines

please remove unused code lines
h2020classification = mergeLists(h2020classification, p.getH2020classification());
mergeOAFDataInfo(e); mergeOAFDataInfo(e);
} }

View File

@ -5,10 +5,12 @@ import java.io.IOException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import eu.dnetlib.dhp.actionmanager.project.csvutils.CSVProject;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.SparkSession;
@ -88,7 +90,29 @@ public class PrepareProgrammeTest {
Dataset<CSVProgramme> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class)); Dataset<CSVProgramme> verificationDataset = spark.createDataset(tmp.rdd(), Encoders.bean(CSVProgramme.class));
Assertions.assertEquals(0, verificationDataset.filter("shortTitle =''").count()); Assertions.assertEquals(0, verificationDataset.filter("title =''").count());
Assertions.assertEquals(0, verificationDataset.filter("classification = ''").count());
Assertions.assertEquals("Societal challenges $ Smart, Green And Integrated Transport $ CLEANSKY2 $ IADP Fast Rotorcraft",
verificationDataset.filter("code = 'H2020-EU.3.4.5.3.'").select("classification").collectAsList()
.get(0).getString(0));
Assertions.assertEquals("Euratom $ Indirect actions $ European Fusion Development Agreement",
verificationDataset.filter("code = 'H2020-Euratom-1.9.'").select("classification").collectAsList()
.get(0).getString(0));
Assertions.assertEquals("Industrial leadership $ Leadership in enabling and industrial technologies $ Advanced manufacturing and processing $ New sustainable business models",
verificationDataset.filter("code = 'H2020-EU.2.1.5.4.'").select("classification").collectAsList()
.get(0).getString(0));
Assertions.assertEquals("Excellent science $ Future and Emerging Technologies (FET) $ FET Open",
verificationDataset.filter("code = 'H2020-EU.1.2.1.'").select("classification").collectAsList()
.get(0).getString(0));
} }
} }