This commit is contained in:
Miriam Baglioni 2020-11-18 18:11:27 +01:00
parent c702f8e6a3
commit cb3cb8df04
2 changed files with 8 additions and 6 deletions

View File

@ -73,13 +73,14 @@ public class PrepareResultsSparkJob implements Serializable {
result.createOrReplaceTempView("result");
String query = "select auth.name name, auth.surname surname, auth.fullname fullname, pIde.value orcid, id, cf.value collectedfrom "
+
String query = "select auth.name name, auth.surname surname, auth.fullname fullname, pIde.value orcid, id, " +
"collect_set(cf.value) as collectedfrom " +
"from result " +
"lateral view explode(author) a as auth " +
"lateral view explode(auth.pid)p as pIde " +
"lateral view explode (collectedfrom) c as cf " +
"where pIde.qualifier.classid = 'orcid'";
"where pIde.qualifier.classid = 'orcid' " +
"group by auth.name, auth.surname, auth.fullname, pIde.value, id";
spark
.sql(query)

View File

@ -2,6 +2,7 @@
package eu.dnetlib.dhp.oa.graph.clean.authorpids;
import java.io.Serializable;
import java.util.List;
public class ResultInfo implements Serializable {
private String id;
@ -9,13 +10,13 @@ public class ResultInfo implements Serializable {
private String surname;
private String fullname;
private String orcid;
private String collectedfrom;
private List<String> collectedfrom;
public String getCollectedfrom() {
public List<String> getCollectedfrom() {
return collectedfrom;
}
public void setCollectedfrom(String collectedfrom) {
public void setCollectedfrom(List<String> collectedfrom) {
this.collectedfrom = collectedfrom;
}