[ENRICHMENT][BETA] Use of community API in enrichment process AND addition to tagging result for communities through projects #359
|
@ -7,7 +7,6 @@ import java.io.IOException;
|
|||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.*;
|
||||
import org.apache.commons.cli.ParseException;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
@ -30,6 +29,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
|||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.utils.*;
|
||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||
import scala.Tuple2;
|
||||
|
||||
|
|
|
@ -31,9 +31,7 @@ case class mappingAuthor(
|
|||
affiliation: Option[mappingAffiliation]
|
||||
) {}
|
||||
|
||||
case class funderInfo(id:String,uri:String, name:String,synonym:List[String] ) {}
|
||||
|
||||
|
||||
case class funderInfo(id: String, uri: String, name: String, synonym: List[String]) {}
|
||||
|
||||
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
|
||||
|
||||
|
@ -41,7 +39,9 @@ case object Crossref2Oaf {
|
|||
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
|
||||
|
||||
val irishFunder: List[funderInfo] = {
|
||||
val s = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json")).mkString
|
||||
val s = Source
|
||||
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json"))
|
||||
.mkString
|
||||
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
|
||||
lazy val json: org.json4s.JValue = parse(s)
|
||||
json.extract[List[funderInfo]]
|
||||
|
@ -100,9 +100,11 @@ case object Crossref2Oaf {
|
|||
"report" -> "0017 Report"
|
||||
)
|
||||
|
||||
def getIrishId(doi:String):Option[String] = {
|
||||
val id =doi.split("/").last
|
||||
irishFunder.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id)))).map(f => f.id)
|
||||
def getIrishId(doi: String): Option[String] = {
|
||||
val id = doi.split("/").last
|
||||
irishFunder
|
||||
.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id))))
|
||||
.map(f => f.id)
|
||||
}
|
||||
|
||||
def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = {
|
||||
|
|
|
@ -50,7 +50,7 @@ class CrossrefMappingTest {
|
|||
}
|
||||
}
|
||||
|
||||
def checkRelation(generatedOAF: List[Oaf]): Unit = {
|
||||
def checkRelation(generatedOAF: List[Oaf]): Unit = {
|
||||
|
||||
val rels: List[Relation] =
|
||||
generatedOAF.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]]
|
||||
|
|
|
@ -21,7 +21,7 @@ import com.google.gson.Gson;
|
|||
public class QueryCommunityAPI {
|
||||
private static final String baseUrl = "https://services.openaire.eu/openaire/";
|
||||
|
||||
private static String get(String geturl) throws IOException{
|
||||
private static String get(String geturl) throws IOException {
|
||||
URL url = new URL(geturl);
|
||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||
conn.setDoOutput(true);
|
||||
|
@ -44,7 +44,7 @@ public class QueryCommunityAPI {
|
|||
return get(baseUrl + "community/" + id);
|
||||
}
|
||||
|
||||
public static String communityDatasource(String id)throws IOException{
|
||||
public static String communityDatasource(String id) throws IOException {
|
||||
return get(baseUrl + "community/" + id + "/contentproviders");
|
||||
|
||||
}
|
||||
|
@ -53,8 +53,8 @@ public class QueryCommunityAPI {
|
|||
return get(baseUrl + "community/" + id + "/propagationOrganizations");
|
||||
}
|
||||
|
||||
public static String communityProjects(String id, String page, String size) throws IOException{
|
||||
return get(baseUrl + "community/" + id +"/projects/" + page + "/" + size);
|
||||
public static String communityProjects(String id, String page, String size) throws IOException {
|
||||
return get(baseUrl + "community/" + id + "/projects/" + page + "/" + size);
|
||||
}
|
||||
|
||||
@NotNull
|
||||
|
@ -74,5 +74,4 @@ public class QueryCommunityAPI {
|
|||
return body;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -1,16 +1,6 @@
|
|||
|
||||
package eu.dnetlib.dhp.api;
|
||||
|
||||
import com.amazonaws.util.StringUtils;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.Maps;
|
||||
import eu.dnetlib.dhp.api.model.*;
|
||||
import eu.dnetlib.dhp.bulktag.community.Community;
|
||||
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
|
||||
import eu.dnetlib.dhp.bulktag.community.Provider;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
|
||||
|
||||
import javax.management.Query;
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
|
@ -19,118 +9,140 @@ import java.util.Map;
|
|||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import javax.management.Query;
|
||||
|
||||
import com.amazonaws.util.StringUtils;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import eu.dnetlib.dhp.api.model.*;
|
||||
import eu.dnetlib.dhp.bulktag.community.Community;
|
||||
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
|
||||
import eu.dnetlib.dhp.bulktag.community.Provider;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 09/10/23
|
||||
*/
|
||||
public class Utils implements Serializable {
|
||||
private static final ObjectMapper MAPPER = new ObjectMapper();
|
||||
private static final VerbResolver resolver = VerbResolverFactory.newInstance();
|
||||
private static final ObjectMapper MAPPER = new ObjectMapper();
|
||||
private static final VerbResolver resolver = VerbResolverFactory.newInstance();
|
||||
|
||||
public static CommunityConfiguration getCommunityConfiguration() throws IOException {
|
||||
final Map<String, Community> communities = Maps.newHashMap();
|
||||
List<Community> validCommunities = new ArrayList<>();
|
||||
getValidCommunities()
|
||||
.forEach(community -> {
|
||||
try {
|
||||
CommunityModel cm = MAPPER.readValue(QueryCommunityAPI.community(community.getId()), CommunityModel.class);
|
||||
validCommunities.add(getCommunity(cm));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
validCommunities.forEach(community ->{
|
||||
try {
|
||||
DatasourceList dl = MAPPER.readValue(QueryCommunityAPI.communityDatasource(community.getId()), DatasourceList.class);
|
||||
community.setProviders(dl.stream().map(d -> {
|
||||
public static CommunityConfiguration getCommunityConfiguration() throws IOException {
|
||||
final Map<String, Community> communities = Maps.newHashMap();
|
||||
List<Community> validCommunities = new ArrayList<>();
|
||||
getValidCommunities()
|
||||
.forEach(community -> {
|
||||
try {
|
||||
CommunityModel cm = MAPPER
|
||||
.readValue(QueryCommunityAPI.community(community.getId()), CommunityModel.class);
|
||||
validCommunities.add(getCommunity(cm));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
validCommunities.forEach(community -> {
|
||||
try {
|
||||
DatasourceList dl = MAPPER
|
||||
.readValue(QueryCommunityAPI.communityDatasource(community.getId()), DatasourceList.class);
|
||||
community.setProviders(dl.stream().map(d -> {
|
||||
// if(d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled()))
|
||||
// return null;
|
||||
Provider p = new Provider();
|
||||
p.setOpenaireId("10|" + d.getOpenaireId());
|
||||
p.setSelectionConstraints(d.getSelectioncriteria());
|
||||
if(p.getSelectionConstraints() != null)
|
||||
p.getSelectionConstraints().setSelection(resolver);
|
||||
return p;
|
||||
})
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList()));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
Provider p = new Provider();
|
||||
p.setOpenaireId("10|" + d.getOpenaireId());
|
||||
p.setSelectionConstraints(d.getSelectioncriteria());
|
||||
if (p.getSelectionConstraints() != null)
|
||||
p.getSelectionConstraints().setSelection(resolver);
|
||||
return p;
|
||||
})
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList()));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
|
||||
validCommunities.forEach(community ->{
|
||||
if(community.isValid())
|
||||
communities.put(community.getId(), community);
|
||||
});
|
||||
return new CommunityConfiguration(communities);
|
||||
}
|
||||
validCommunities.forEach(community -> {
|
||||
if (community.isValid())
|
||||
communities.put(community.getId(), community);
|
||||
});
|
||||
return new CommunityConfiguration(communities);
|
||||
}
|
||||
|
||||
private static Community getCommunity(CommunityModel cm){
|
||||
Community c = new Community();
|
||||
c.setId(cm.getId());
|
||||
c.setZenodoCommunities(cm.getOtherZenodoCommunities());
|
||||
if(!StringUtils.isNullOrEmpty(cm.getZenodoCommunity()))
|
||||
c.getZenodoCommunities().add(cm.getZenodoCommunity());
|
||||
c.setSubjects(cm.getSubjects());
|
||||
c.getSubjects().addAll(cm.getFos());
|
||||
c.getSubjects().addAll(cm.getSdg());
|
||||
c.setConstraints(cm.getAdvancedConstraints());
|
||||
if(c.getConstraints()!=null)
|
||||
c.getConstraints().setSelection(resolver);
|
||||
c.setRemoveConstraints(cm.getRemoveConstraints());
|
||||
if(c.getRemoveConstraints()!=null)
|
||||
c.getRemoveConstraints().setSelection(resolver);
|
||||
return c;
|
||||
}
|
||||
private static Community getCommunity(CommunityModel cm) {
|
||||
Community c = new Community();
|
||||
c.setId(cm.getId());
|
||||
c.setZenodoCommunities(cm.getOtherZenodoCommunities());
|
||||
if (!StringUtils.isNullOrEmpty(cm.getZenodoCommunity()))
|
||||
c.getZenodoCommunities().add(cm.getZenodoCommunity());
|
||||
c.setSubjects(cm.getSubjects());
|
||||
c.getSubjects().addAll(cm.getFos());
|
||||
c.getSubjects().addAll(cm.getSdg());
|
||||
c.setConstraints(cm.getAdvancedConstraints());
|
||||
if (c.getConstraints() != null)
|
||||
c.getConstraints().setSelection(resolver);
|
||||
c.setRemoveConstraints(cm.getRemoveConstraints());
|
||||
if (c.getRemoveConstraints() != null)
|
||||
c.getRemoveConstraints().setSelection(resolver);
|
||||
return c;
|
||||
}
|
||||
|
||||
public static List<CommunityModel> getValidCommunities() throws IOException {
|
||||
return MAPPER.readValue(QueryCommunityAPI.communities(), CommunitySummary.class)
|
||||
.stream()
|
||||
.filter(community -> !community.getStatus().equals("hidden") &&
|
||||
(community.getType().equals("ri") || community.getType().equals("community")))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
public static CommunityEntityMap getCommunityOrganization() throws IOException {
|
||||
CommunityEntityMap organizationMap = new CommunityEntityMap();
|
||||
getValidCommunities()
|
||||
.forEach(community -> {
|
||||
String id = community.getId();
|
||||
try {
|
||||
List<String> associatedOrgs = MAPPER.readValue(QueryCommunityAPI.communityPropagationOrganization(id), OrganizationList.class);
|
||||
if(associatedOrgs.size() >0){
|
||||
organizationMap.put(id, associatedOrgs);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
return organizationMap;
|
||||
}
|
||||
public static List<CommunityModel> getValidCommunities() throws IOException {
|
||||
return MAPPER
|
||||
.readValue(QueryCommunityAPI.communities(), CommunitySummary.class)
|
||||
.stream()
|
||||
.filter(
|
||||
community -> !community.getStatus().equals("hidden") &&
|
||||
(community.getType().equals("ri") || community.getType().equals("community")))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static CommunityEntityMap getCommunityProjects()throws IOException{
|
||||
CommunityEntityMap projectMap = new CommunityEntityMap();
|
||||
getValidCommunities()
|
||||
.forEach(community ->{
|
||||
int page = -1;
|
||||
int size = 100;
|
||||
ContentModel cm = new ContentModel();
|
||||
List<String> projectList = new ArrayList<>();
|
||||
do{
|
||||
page ++;
|
||||
try {
|
||||
cm = MAPPER.readValue( QueryCommunityAPI.communityProjects(community.getId(), String.valueOf(page), String.valueOf(size)), ContentModel.class);
|
||||
if (cm.getContent().size() > 0){
|
||||
public static CommunityEntityMap getCommunityOrganization() throws IOException {
|
||||
CommunityEntityMap organizationMap = new CommunityEntityMap();
|
||||
getValidCommunities()
|
||||
.forEach(community -> {
|
||||
String id = community.getId();
|
||||
try {
|
||||
List<String> associatedOrgs = MAPPER
|
||||
.readValue(QueryCommunityAPI.communityPropagationOrganization(id), OrganizationList.class);
|
||||
if (associatedOrgs.size() > 0) {
|
||||
organizationMap.put(id, associatedOrgs);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
return organizationMap;
|
||||
}
|
||||
|
||||
cm.getContent().forEach(p ->
|
||||
projectList.add ("40|" + p.getOpenaireId()));
|
||||
projectMap.put(community.getId(), projectList);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}while (!cm.getLast());
|
||||
});
|
||||
return projectMap;
|
||||
}
|
||||
public static CommunityEntityMap getCommunityProjects() throws IOException {
|
||||
CommunityEntityMap projectMap = new CommunityEntityMap();
|
||||
getValidCommunities()
|
||||
.forEach(community -> {
|
||||
int page = -1;
|
||||
int size = 100;
|
||||
ContentModel cm = new ContentModel();
|
||||
List<String> projectList = new ArrayList<>();
|
||||
do {
|
||||
page++;
|
||||
try {
|
||||
cm = MAPPER
|
||||
.readValue(
|
||||
QueryCommunityAPI
|
||||
.communityProjects(community.getId(), String.valueOf(page), String.valueOf(size)),
|
||||
ContentModel.class);
|
||||
if (cm.getContent().size() > 0) {
|
||||
|
||||
cm.getContent().forEach(p -> projectList.add("40|" + p.getOpenaireId()));
|
||||
projectMap.put(community.getId(), projectList);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
} while (!cm.getLast());
|
||||
});
|
||||
return projectMap;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
|
||||
package eu.dnetlib.dhp.api.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonAutoDetect;
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import com.google.gson.Gson;
|
||||
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
|
||||
|
||||
@JsonAutoDetect
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
|
@ -30,10 +31,9 @@ public class CommunityContentprovider {
|
|||
this.openaireId = openaireId;
|
||||
}
|
||||
|
||||
|
||||
public SelectionConstraints getSelectioncriteria() {
|
||||
|
||||
return this.selectioncriteria;
|
||||
return this.selectioncriteria;
|
||||
}
|
||||
|
||||
public void setSelectioncriteria(SelectionConstraints selectioncriteria) {
|
||||
|
|
|
@ -5,8 +5,8 @@ import java.io.Serializable;
|
|||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.api.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
@ -8,9 +9,7 @@ import java.util.ArrayList;
|
|||
* @Date 06/10/23
|
||||
*/
|
||||
public class CommunitySummary extends ArrayList<CommunityModel> implements Serializable {
|
||||
public CommunitySummary() {
|
||||
super();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public CommunitySummary() {
|
||||
super();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,50 +1,51 @@
|
|||
package eu.dnetlib.dhp.api.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
package eu.dnetlib.dhp.api.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 09/10/23
|
||||
*/
|
||||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class ContentModel implements Serializable {
|
||||
private List<ProjectModel> content;
|
||||
private Integer totalPages;
|
||||
private Boolean last;
|
||||
private Integer number;
|
||||
private List<ProjectModel> content;
|
||||
private Integer totalPages;
|
||||
private Boolean last;
|
||||
private Integer number;
|
||||
|
||||
public List<ProjectModel> getContent() {
|
||||
return content;
|
||||
}
|
||||
public List<ProjectModel> getContent() {
|
||||
return content;
|
||||
}
|
||||
|
||||
public void setContent(List<ProjectModel> content) {
|
||||
this.content = content;
|
||||
}
|
||||
public void setContent(List<ProjectModel> content) {
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
public Integer getTotalPages() {
|
||||
return totalPages;
|
||||
}
|
||||
public Integer getTotalPages() {
|
||||
return totalPages;
|
||||
}
|
||||
|
||||
public void setTotalPages(Integer totalPages) {
|
||||
this.totalPages = totalPages;
|
||||
}
|
||||
public void setTotalPages(Integer totalPages) {
|
||||
this.totalPages = totalPages;
|
||||
}
|
||||
|
||||
public Boolean getLast() {
|
||||
return last;
|
||||
}
|
||||
public Boolean getLast() {
|
||||
return last;
|
||||
}
|
||||
|
||||
public void setLast(Boolean last) {
|
||||
this.last = last;
|
||||
}
|
||||
public void setLast(Boolean last) {
|
||||
this.last = last;
|
||||
}
|
||||
|
||||
public Integer getNumber() {
|
||||
return number;
|
||||
}
|
||||
public Integer getNumber() {
|
||||
return number;
|
||||
}
|
||||
|
||||
public void setNumber(Integer number) {
|
||||
this.number = number;
|
||||
}
|
||||
public void setNumber(Integer number) {
|
||||
this.number = number;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
|
||||
package eu.dnetlib.dhp.api.model;
|
||||
|
||||
import eu.dnetlib.dhp.api.model.CommunityContentprovider;
|
||||
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import eu.dnetlib.dhp.api.model.CommunityContentprovider;
|
||||
|
||||
public class DatasourceList extends ArrayList<CommunityContentprovider> implements Serializable {
|
||||
public DatasourceList(){
|
||||
public DatasourceList() {
|
||||
super();
|
||||
}
|
||||
}
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
package eu.dnetlib.dhp.api.model;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
@ -9,7 +10,7 @@ import java.util.ArrayList;
|
|||
*/
|
||||
public class OrganizationList extends ArrayList<String> implements Serializable {
|
||||
|
||||
public OrganizationList(){
|
||||
super();
|
||||
}
|
||||
public OrganizationList() {
|
||||
super();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
|
||||
package eu.dnetlib.dhp.api.model;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 09/10/23
|
||||
|
@ -11,13 +12,13 @@ import java.io.Serializable;
|
|||
@JsonIgnoreProperties(ignoreUnknown = true)
|
||||
public class ProjectModel implements Serializable {
|
||||
|
||||
private String openaireId;
|
||||
private String openaireId;
|
||||
|
||||
public String getOpenaireId() {
|
||||
return openaireId;
|
||||
}
|
||||
public String getOpenaireId() {
|
||||
return openaireId;
|
||||
}
|
||||
|
||||
public void setOpenaireId(String openaireId) {
|
||||
this.openaireId = openaireId;
|
||||
}
|
||||
public void setOpenaireId(String openaireId) {
|
||||
this.openaireId = openaireId;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
import eu.dnetlib.dhp.api.Utils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.function.FilterFunction;
|
||||
|
@ -21,6 +20,7 @@ import org.slf4j.LoggerFactory;
|
|||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.api.Utils;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.bulktag.community.*;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
|
@ -87,7 +87,7 @@ public class SparkBulkTagJob {
|
|||
if (isTest) {
|
||||
cc = CommunityConfigurationFactory.newInstance(taggingConf);
|
||||
} else {
|
||||
cc = Utils.getCommunityConfiguration();//QueryInformationSystem.getCommunityConfiguration(parser.get("isLookUpUrl"));
|
||||
cc = Utils.getCommunityConfiguration();// QueryInformationSystem.getCommunityConfiguration(parser.get("isLookUpUrl"));
|
||||
}
|
||||
|
||||
runWithSparkSession(
|
||||
|
|
|
@ -8,7 +8,6 @@ import java.util.Optional;
|
|||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
|
||||
/** Created by miriam on 01/08/2018. */
|
||||
public class Community implements Serializable {
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ import java.io.StringReader;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
|
|
@ -4,9 +4,10 @@ package eu.dnetlib.dhp.bulktag.community;
|
|||
import java.io.Serializable;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
|
||||
import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||
import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore;
|
||||
|
||||
public class Constraint implements Serializable {
|
||||
private String verb;
|
||||
|
@ -39,11 +40,12 @@ public class Constraint implements Serializable {
|
|||
public void setValue(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
//@JsonIgnore
|
||||
//public void setSelection(Selection sel) {
|
||||
// public void setSelection(Selection sel) {
|
||||
// selection = sel;
|
||||
// }
|
||||
@JsonIgnore
|
||||
@JsonIgnore
|
||||
public void setSelection(VerbResolver resolver)
|
||||
throws InvocationTargetException, NoSuchMethodException, InstantiationException,
|
||||
IllegalAccessException {
|
||||
|
@ -54,5 +56,4 @@ public class Constraint implements Serializable {
|
|||
return selection.apply(metadata);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ import com.google.gson.Gson;
|
|||
import com.google.gson.reflect.TypeToken;
|
||||
|
||||
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
||||
|
||||
@JsonAutoDetect
|
||||
public class SelectionConstraints implements Serializable {
|
||||
private List<Constraints> criteria;
|
||||
|
|
|
@ -6,8 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
import eu.dnetlib.dhp.api.Utils;
|
||||
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.compress.GzipCodec;
|
||||
import org.apache.spark.SparkConf;
|
||||
|
@ -20,6 +18,8 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import com.google.gson.Gson;
|
||||
|
||||
import eu.dnetlib.dhp.api.Utils;
|
||||
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
|
|
@ -1572,36 +1572,36 @@ public class BulkTagJobTest {
|
|||
void newConfTest() throws Exception {
|
||||
final String pathMap = BulkTagJobTest.pathMap;
|
||||
SparkBulkTagJob
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(),
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
.main(
|
||||
new String[] {
|
||||
"-isTest", Boolean.TRUE.toString(),
|
||||
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||
"-sourcePath",
|
||||
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(),
|
||||
"-taggingConf", taggingConf,
|
||||
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
|
||||
"-outputPath", workingDir.toString() + "/dataset",
|
||||
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||
"-pathMap", pathMap
|
||||
});
|
||||
|
||||
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||
|
||||
JavaRDD<Dataset> tmp = sc
|
||||
.textFile(workingDir.toString() + "/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||
.textFile(workingDir.toString() + "/dataset")
|
||||
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
|
||||
|
||||
Assertions.assertEquals(10, tmp.count());
|
||||
org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark
|
||||
.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
|
||||
.createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
|
||||
|
||||
verificationDataset.createOrReplaceTempView("dataset");
|
||||
|
||||
String query = "select id, MyT.id community "
|
||||
+ "from dataset "
|
||||
+ "lateral view explode(context) c as MyT "
|
||||
+ "lateral view explode(MyT.datainfo) d as MyD "
|
||||
+ "where MyD.inferenceprovenance = 'bulktagging'";
|
||||
+ "from dataset "
|
||||
+ "lateral view explode(context) c as MyT "
|
||||
+ "lateral view explode(MyT.datainfo) d as MyD "
|
||||
+ "where MyD.inferenceprovenance = 'bulktagging'";
|
||||
|
||||
Assertions.assertEquals(0, spark.sql(query).count());
|
||||
}
|
||||
|
|
|
@ -47,7 +47,7 @@ class CommunityConfigurationFactoryTest {
|
|||
sc.setVerb("not_contains");
|
||||
sc.setField("contributor");
|
||||
sc.setValue("DARIAH");
|
||||
sc.setSelection(resolver);//.getSelectionCriteria(sc.getVerb(), sc.getValue()));
|
||||
sc.setSelection(resolver);// .getSelectionCriteria(sc.getVerb(), sc.getValue()));
|
||||
String metadata = "This work has been partially supported by DARIAH-EU infrastructure";
|
||||
Assertions.assertFalse(sc.verifyCriteria(metadata));
|
||||
}
|
||||
|
|
|
@ -1,14 +1,8 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag;
|
||||
|
||||
import eu.dnetlib.dhp.api.Utils;
|
||||
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
|
||||
import eu.dnetlib.dhp.bulktag.community.Community;
|
||||
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.api.model.CommunityModel;
|
||||
import eu.dnetlib.dhp.api.model.CommunitySummary;
|
||||
import eu.dnetlib.dhp.api.model.DatasourceList;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
|
@ -16,8 +10,13 @@ import com.fasterxml.jackson.core.JsonProcessingException;
|
|||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.api.QueryCommunityAPI;
|
||||
|
||||
import java.util.List;
|
||||
import eu.dnetlib.dhp.api.Utils;
|
||||
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
|
||||
import eu.dnetlib.dhp.api.model.CommunityModel;
|
||||
import eu.dnetlib.dhp.api.model.CommunitySummary;
|
||||
import eu.dnetlib.dhp.api.model.DatasourceList;
|
||||
import eu.dnetlib.dhp.bulktag.community.Community;
|
||||
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
|
@ -43,9 +42,12 @@ public class QueryCommunityAPITest {
|
|||
void community() throws Exception {
|
||||
String id = "dh-ch";
|
||||
String body = QueryCommunityAPI.community(id);
|
||||
System.out.println(new ObjectMapper().writeValueAsString(new ObjectMapper()
|
||||
.readValue(body, CommunityModel.class)))
|
||||
;
|
||||
System.out
|
||||
.println(
|
||||
new ObjectMapper()
|
||||
.writeValueAsString(
|
||||
new ObjectMapper()
|
||||
.readValue(body, CommunityModel.class)));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -53,14 +55,14 @@ public class QueryCommunityAPITest {
|
|||
String id = "dh-ch";
|
||||
String body = QueryCommunityAPI.communityDatasource(id);
|
||||
new ObjectMapper()
|
||||
.readValue(body, DatasourceList.class)
|
||||
.forEach(ds-> {
|
||||
try {
|
||||
System.out.println(new ObjectMapper().writeValueAsString(ds));
|
||||
} catch (JsonProcessingException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
.readValue(body, DatasourceList.class)
|
||||
.forEach(ds -> {
|
||||
try {
|
||||
System.out.println(new ObjectMapper().writeValueAsString(ds));
|
||||
} catch (JsonProcessingException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
;
|
||||
}
|
||||
|
||||
|
@ -68,24 +70,33 @@ public class QueryCommunityAPITest {
|
|||
void validCommunities() throws Exception {
|
||||
CommunityConfiguration cc = Utils.getCommunityConfiguration();
|
||||
System.out.println(cc.getCommunities().keySet());
|
||||
Community community =cc.getCommunities().get("aurora");
|
||||
Community community = cc.getCommunities().get("aurora");
|
||||
Assertions.assertEquals(0, community.getSubjects().size());
|
||||
Assertions.assertEquals(null, community.getConstraints());
|
||||
Assertions.assertEquals(null, community.getRemoveConstraints());
|
||||
Assertions.assertEquals(2, community.getZenodoCommunities().size());
|
||||
Assertions.assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("aurora-universities-network")));
|
||||
Assertions.assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("university-of-innsbruck")));
|
||||
Assertions
|
||||
.assertTrue(
|
||||
community.getZenodoCommunities().stream().anyMatch(c -> c.equals("aurora-universities-network")));
|
||||
Assertions
|
||||
.assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("university-of-innsbruck")));
|
||||
Assertions.assertEquals(35, community.getProviders().size());
|
||||
Assertions.assertEquals(35, community.getProviders().stream().filter(p->p.getSelectionConstraints()==null).count());
|
||||
Assertions
|
||||
.assertEquals(
|
||||
35, community.getProviders().stream().filter(p -> p.getSelectionConstraints() == null).count());
|
||||
}
|
||||
|
||||
@Test
|
||||
void getCommunityProjects() throws Exception {
|
||||
CommunityEntityMap projectMap = Utils.getCommunityProjects();
|
||||
Assertions.assertFalse(projectMap.containsKey("mes"));
|
||||
Assertions.assertEquals(33, projectMap.size());
|
||||
Assertions.assertTrue(projectMap.keySet().stream().allMatch(k -> projectMap.get(k).stream().allMatch(p -> p.startsWith("40|"))));
|
||||
Assertions.assertEquals(33, projectMap.size());
|
||||
Assertions
|
||||
.assertTrue(
|
||||
projectMap
|
||||
.keySet()
|
||||
.stream()
|
||||
.allMatch(k -> projectMap.get(k).stream().allMatch(p -> p.startsWith("40|"))));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue