refactoring

This commit is contained in:
Miriam Baglioni 2023-10-09 14:52:17 +02:00
parent 8448b9ebfb
commit a3d01ccb24
21 changed files with 268 additions and 239 deletions

View File

@ -7,7 +7,6 @@ import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
import java.util.*; import java.util.*;
import eu.dnetlib.dhp.schema.oaf.utils.*;
import org.apache.commons.cli.ParseException; import org.apache.commons.cli.ParseException;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
@ -30,6 +29,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.*;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2; import scala.Tuple2;

View File

@ -31,9 +31,7 @@ case class mappingAuthor(
affiliation: Option[mappingAffiliation] affiliation: Option[mappingAffiliation]
) {} ) {}
case class funderInfo(id:String,uri:String, name:String,synonym:List[String] ) {} case class funderInfo(id: String, uri: String, name: String, synonym: List[String]) {}
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {} case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
@ -41,7 +39,9 @@ case object Crossref2Oaf {
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass) val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
val irishFunder: List[funderInfo] = { val irishFunder: List[funderInfo] = {
val s = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json")).mkString val s = Source
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json"))
.mkString
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: org.json4s.JValue = parse(s) lazy val json: org.json4s.JValue = parse(s)
json.extract[List[funderInfo]] json.extract[List[funderInfo]]
@ -100,9 +100,11 @@ case object Crossref2Oaf {
"report" -> "0017 Report" "report" -> "0017 Report"
) )
def getIrishId(doi:String):Option[String] = { def getIrishId(doi: String): Option[String] = {
val id =doi.split("/").last val id = doi.split("/").last
irishFunder.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id)))).map(f => f.id) irishFunder
.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id))))
.map(f => f.id)
} }
def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = {

View File

@ -50,7 +50,7 @@ class CrossrefMappingTest {
} }
} }
def checkRelation(generatedOAF: List[Oaf]): Unit = { def checkRelation(generatedOAF: List[Oaf]): Unit = {
val rels: List[Relation] = val rels: List[Relation] =
generatedOAF.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]] generatedOAF.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]]

View File

@ -21,7 +21,7 @@ import com.google.gson.Gson;
public class QueryCommunityAPI { public class QueryCommunityAPI {
private static final String baseUrl = "https://services.openaire.eu/openaire/"; private static final String baseUrl = "https://services.openaire.eu/openaire/";
private static String get(String geturl) throws IOException{ private static String get(String geturl) throws IOException {
URL url = new URL(geturl); URL url = new URL(geturl);
HttpURLConnection conn = (HttpURLConnection) url.openConnection(); HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setDoOutput(true); conn.setDoOutput(true);
@ -44,7 +44,7 @@ public class QueryCommunityAPI {
return get(baseUrl + "community/" + id); return get(baseUrl + "community/" + id);
} }
public static String communityDatasource(String id)throws IOException{ public static String communityDatasource(String id) throws IOException {
return get(baseUrl + "community/" + id + "/contentproviders"); return get(baseUrl + "community/" + id + "/contentproviders");
} }
@ -53,8 +53,8 @@ public class QueryCommunityAPI {
return get(baseUrl + "community/" + id + "/propagationOrganizations"); return get(baseUrl + "community/" + id + "/propagationOrganizations");
} }
public static String communityProjects(String id, String page, String size) throws IOException{ public static String communityProjects(String id, String page, String size) throws IOException {
return get(baseUrl + "community/" + id +"/projects/" + page + "/" + size); return get(baseUrl + "community/" + id + "/projects/" + page + "/" + size);
} }
@NotNull @NotNull
@ -74,5 +74,4 @@ public class QueryCommunityAPI {
return body; return body;
} }
} }

View File

@ -1,16 +1,6 @@
package eu.dnetlib.dhp.api; package eu.dnetlib.dhp.api;
import com.amazonaws.util.StringUtils;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.api.model.*;
import eu.dnetlib.dhp.bulktag.community.Community;
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
import eu.dnetlib.dhp.bulktag.community.Provider;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
import javax.management.Query;
import java.io.IOException; import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
@ -19,118 +9,140 @@ import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import javax.management.Query;
import com.amazonaws.util.StringUtils;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.api.model.*;
import eu.dnetlib.dhp.bulktag.community.Community;
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
import eu.dnetlib.dhp.bulktag.community.Provider;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
/** /**
* @author miriam.baglioni * @author miriam.baglioni
* @Date 09/10/23 * @Date 09/10/23
*/ */
public class Utils implements Serializable { public class Utils implements Serializable {
private static final ObjectMapper MAPPER = new ObjectMapper(); private static final ObjectMapper MAPPER = new ObjectMapper();
private static final VerbResolver resolver = VerbResolverFactory.newInstance(); private static final VerbResolver resolver = VerbResolverFactory.newInstance();
public static CommunityConfiguration getCommunityConfiguration() throws IOException { public static CommunityConfiguration getCommunityConfiguration() throws IOException {
final Map<String, Community> communities = Maps.newHashMap(); final Map<String, Community> communities = Maps.newHashMap();
List<Community> validCommunities = new ArrayList<>(); List<Community> validCommunities = new ArrayList<>();
getValidCommunities() getValidCommunities()
.forEach(community -> { .forEach(community -> {
try { try {
CommunityModel cm = MAPPER.readValue(QueryCommunityAPI.community(community.getId()), CommunityModel.class); CommunityModel cm = MAPPER
validCommunities.add(getCommunity(cm)); .readValue(QueryCommunityAPI.community(community.getId()), CommunityModel.class);
} catch (IOException e) { validCommunities.add(getCommunity(cm));
throw new RuntimeException(e); } catch (IOException e) {
} throw new RuntimeException(e);
}); }
validCommunities.forEach(community ->{ });
try { validCommunities.forEach(community -> {
DatasourceList dl = MAPPER.readValue(QueryCommunityAPI.communityDatasource(community.getId()), DatasourceList.class); try {
community.setProviders(dl.stream().map(d -> { DatasourceList dl = MAPPER
.readValue(QueryCommunityAPI.communityDatasource(community.getId()), DatasourceList.class);
community.setProviders(dl.stream().map(d -> {
// if(d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled())) // if(d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled()))
// return null; // return null;
Provider p = new Provider(); Provider p = new Provider();
p.setOpenaireId("10|" + d.getOpenaireId()); p.setOpenaireId("10|" + d.getOpenaireId());
p.setSelectionConstraints(d.getSelectioncriteria()); p.setSelectionConstraints(d.getSelectioncriteria());
if(p.getSelectionConstraints() != null) if (p.getSelectionConstraints() != null)
p.getSelectionConstraints().setSelection(resolver); p.getSelectionConstraints().setSelection(resolver);
return p; return p;
}) })
.filter(Objects::nonNull) .filter(Objects::nonNull)
.collect(Collectors.toList())); .collect(Collectors.toList()));
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
}); });
validCommunities.forEach(community ->{ validCommunities.forEach(community -> {
if(community.isValid()) if (community.isValid())
communities.put(community.getId(), community); communities.put(community.getId(), community);
}); });
return new CommunityConfiguration(communities); return new CommunityConfiguration(communities);
} }
private static Community getCommunity(CommunityModel cm){ private static Community getCommunity(CommunityModel cm) {
Community c = new Community(); Community c = new Community();
c.setId(cm.getId()); c.setId(cm.getId());
c.setZenodoCommunities(cm.getOtherZenodoCommunities()); c.setZenodoCommunities(cm.getOtherZenodoCommunities());
if(!StringUtils.isNullOrEmpty(cm.getZenodoCommunity())) if (!StringUtils.isNullOrEmpty(cm.getZenodoCommunity()))
c.getZenodoCommunities().add(cm.getZenodoCommunity()); c.getZenodoCommunities().add(cm.getZenodoCommunity());
c.setSubjects(cm.getSubjects()); c.setSubjects(cm.getSubjects());
c.getSubjects().addAll(cm.getFos()); c.getSubjects().addAll(cm.getFos());
c.getSubjects().addAll(cm.getSdg()); c.getSubjects().addAll(cm.getSdg());
c.setConstraints(cm.getAdvancedConstraints()); c.setConstraints(cm.getAdvancedConstraints());
if(c.getConstraints()!=null) if (c.getConstraints() != null)
c.getConstraints().setSelection(resolver); c.getConstraints().setSelection(resolver);
c.setRemoveConstraints(cm.getRemoveConstraints()); c.setRemoveConstraints(cm.getRemoveConstraints());
if(c.getRemoveConstraints()!=null) if (c.getRemoveConstraints() != null)
c.getRemoveConstraints().setSelection(resolver); c.getRemoveConstraints().setSelection(resolver);
return c; return c;
} }
public static List<CommunityModel> getValidCommunities() throws IOException { public static List<CommunityModel> getValidCommunities() throws IOException {
return MAPPER.readValue(QueryCommunityAPI.communities(), CommunitySummary.class) return MAPPER
.stream() .readValue(QueryCommunityAPI.communities(), CommunitySummary.class)
.filter(community -> !community.getStatus().equals("hidden") && .stream()
(community.getType().equals("ri") || community.getType().equals("community"))) .filter(
.collect(Collectors.toList()); community -> !community.getStatus().equals("hidden") &&
} (community.getType().equals("ri") || community.getType().equals("community")))
public static CommunityEntityMap getCommunityOrganization() throws IOException { .collect(Collectors.toList());
CommunityEntityMap organizationMap = new CommunityEntityMap(); }
getValidCommunities()
.forEach(community -> {
String id = community.getId();
try {
List<String> associatedOrgs = MAPPER.readValue(QueryCommunityAPI.communityPropagationOrganization(id), OrganizationList.class);
if(associatedOrgs.size() >0){
organizationMap.put(id, associatedOrgs);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
});
return organizationMap;
}
public static CommunityEntityMap getCommunityProjects()throws IOException{ public static CommunityEntityMap getCommunityOrganization() throws IOException {
CommunityEntityMap projectMap = new CommunityEntityMap(); CommunityEntityMap organizationMap = new CommunityEntityMap();
getValidCommunities() getValidCommunities()
.forEach(community ->{ .forEach(community -> {
int page = -1; String id = community.getId();
int size = 100; try {
ContentModel cm = new ContentModel(); List<String> associatedOrgs = MAPPER
List<String> projectList = new ArrayList<>(); .readValue(QueryCommunityAPI.communityPropagationOrganization(id), OrganizationList.class);
do{ if (associatedOrgs.size() > 0) {
page ++; organizationMap.put(id, associatedOrgs);
try { }
cm = MAPPER.readValue( QueryCommunityAPI.communityProjects(community.getId(), String.valueOf(page), String.valueOf(size)), ContentModel.class); } catch (IOException e) {
if (cm.getContent().size() > 0){ throw new RuntimeException(e);
}
});
return organizationMap;
}
cm.getContent().forEach(p -> public static CommunityEntityMap getCommunityProjects() throws IOException {
projectList.add ("40|" + p.getOpenaireId())); CommunityEntityMap projectMap = new CommunityEntityMap();
projectMap.put(community.getId(), projectList); getValidCommunities()
} .forEach(community -> {
} catch (IOException e) { int page = -1;
throw new RuntimeException(e); int size = 100;
} ContentModel cm = new ContentModel();
}while (!cm.getLast()); List<String> projectList = new ArrayList<>();
}); do {
return projectMap; page++;
} try {
cm = MAPPER
.readValue(
QueryCommunityAPI
.communityProjects(community.getId(), String.valueOf(page), String.valueOf(size)),
ContentModel.class);
if (cm.getContent().size() > 0) {
cm.getContent().forEach(p -> projectList.add("40|" + p.getOpenaireId()));
projectMap.put(community.getId(), projectList);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
} while (!cm.getLast());
});
return projectMap;
}
} }

View File

@ -1,10 +1,11 @@
package eu.dnetlib.dhp.api.model; package eu.dnetlib.dhp.api.model;
import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
@JsonAutoDetect @JsonAutoDetect
@JsonIgnoreProperties(ignoreUnknown = true) @JsonIgnoreProperties(ignoreUnknown = true)
@ -30,10 +31,9 @@ public class CommunityContentprovider {
this.openaireId = openaireId; this.openaireId = openaireId;
} }
public SelectionConstraints getSelectioncriteria() { public SelectionConstraints getSelectioncriteria() {
return this.selectioncriteria; return this.selectioncriteria;
} }
public void setSelectioncriteria(SelectionConstraints selectioncriteria) { public void setSelectioncriteria(SelectionConstraints selectioncriteria) {

View File

@ -5,8 +5,8 @@ import java.io.Serializable;
import java.util.List; import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
/** /**
* @author miriam.baglioni * @author miriam.baglioni

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.api.model; package eu.dnetlib.dhp.api.model;
import java.io.Serializable; import java.io.Serializable;
@ -8,9 +9,7 @@ import java.util.ArrayList;
* @Date 06/10/23 * @Date 06/10/23
*/ */
public class CommunitySummary extends ArrayList<CommunityModel> implements Serializable { public class CommunitySummary extends ArrayList<CommunityModel> implements Serializable {
public CommunitySummary() { public CommunitySummary() {
super(); super();
} }
} }

View File

@ -1,50 +1,51 @@
package eu.dnetlib.dhp.api.model;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties; package eu.dnetlib.dhp.api.model;
import java.io.Serializable; import java.io.Serializable;
import java.util.List; import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
/** /**
* @author miriam.baglioni * @author miriam.baglioni
* @Date 09/10/23 * @Date 09/10/23
*/ */
@JsonIgnoreProperties(ignoreUnknown = true) @JsonIgnoreProperties(ignoreUnknown = true)
public class ContentModel implements Serializable { public class ContentModel implements Serializable {
private List<ProjectModel> content; private List<ProjectModel> content;
private Integer totalPages; private Integer totalPages;
private Boolean last; private Boolean last;
private Integer number; private Integer number;
public List<ProjectModel> getContent() { public List<ProjectModel> getContent() {
return content; return content;
} }
public void setContent(List<ProjectModel> content) { public void setContent(List<ProjectModel> content) {
this.content = content; this.content = content;
} }
public Integer getTotalPages() { public Integer getTotalPages() {
return totalPages; return totalPages;
} }
public void setTotalPages(Integer totalPages) { public void setTotalPages(Integer totalPages) {
this.totalPages = totalPages; this.totalPages = totalPages;
} }
public Boolean getLast() { public Boolean getLast() {
return last; return last;
} }
public void setLast(Boolean last) { public void setLast(Boolean last) {
this.last = last; this.last = last;
} }
public Integer getNumber() { public Integer getNumber() {
return number; return number;
} }
public void setNumber(Integer number) { public void setNumber(Integer number) {
this.number = number; this.number = number;
} }
} }

View File

@ -1,12 +1,13 @@
package eu.dnetlib.dhp.api.model; package eu.dnetlib.dhp.api.model;
import eu.dnetlib.dhp.api.model.CommunityContentprovider;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import eu.dnetlib.dhp.api.model.CommunityContentprovider;
public class DatasourceList extends ArrayList<CommunityContentprovider> implements Serializable { public class DatasourceList extends ArrayList<CommunityContentprovider> implements Serializable {
public DatasourceList(){ public DatasourceList() {
super(); super();
} }
} }

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.api.model; package eu.dnetlib.dhp.api.model;
import java.io.Serializable; import java.io.Serializable;
@ -9,7 +10,7 @@ import java.util.ArrayList;
*/ */
public class OrganizationList extends ArrayList<String> implements Serializable { public class OrganizationList extends ArrayList<String> implements Serializable {
public OrganizationList(){ public OrganizationList() {
super(); super();
} }
} }

View File

@ -1,9 +1,10 @@
package eu.dnetlib.dhp.api.model; package eu.dnetlib.dhp.api.model;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import java.io.Serializable; import java.io.Serializable;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
/** /**
* @author miriam.baglioni * @author miriam.baglioni
* @Date 09/10/23 * @Date 09/10/23
@ -11,13 +12,13 @@ import java.io.Serializable;
@JsonIgnoreProperties(ignoreUnknown = true) @JsonIgnoreProperties(ignoreUnknown = true)
public class ProjectModel implements Serializable { public class ProjectModel implements Serializable {
private String openaireId; private String openaireId;
public String getOpenaireId() { public String getOpenaireId() {
return openaireId; return openaireId;
} }
public void setOpenaireId(String openaireId) { public void setOpenaireId(String openaireId) {
this.openaireId = openaireId; this.openaireId = openaireId;
} }
} }

View File

@ -6,7 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.*; import java.util.*;
import eu.dnetlib.dhp.api.Utils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.FilterFunction;
@ -21,6 +20,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.bulktag.community.*; import eu.dnetlib.dhp.bulktag.community.*;
import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Datasource;
@ -87,7 +87,7 @@ public class SparkBulkTagJob {
if (isTest) { if (isTest) {
cc = CommunityConfigurationFactory.newInstance(taggingConf); cc = CommunityConfigurationFactory.newInstance(taggingConf);
} else { } else {
cc = Utils.getCommunityConfiguration();//QueryInformationSystem.getCommunityConfiguration(parser.get("isLookUpUrl")); cc = Utils.getCommunityConfiguration();// QueryInformationSystem.getCommunityConfiguration(parser.get("isLookUpUrl"));
} }
runWithSparkSession( runWithSparkSession(

View File

@ -8,7 +8,6 @@ import java.util.Optional;
import com.google.gson.Gson; import com.google.gson.Gson;
/** Created by miriam on 01/08/2018. */ /** Created by miriam on 01/08/2018. */
public class Community implements Serializable { public class Community implements Serializable {

View File

@ -5,6 +5,7 @@ import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;

View File

@ -4,9 +4,10 @@ package eu.dnetlib.dhp.bulktag.community;
import java.io.Serializable; import java.io.Serializable;
import java.lang.reflect.InvocationTargetException; import java.lang.reflect.InvocationTargetException;
import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore;
import eu.dnetlib.dhp.bulktag.criteria.Selection; import eu.dnetlib.dhp.bulktag.criteria.Selection;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore;
public class Constraint implements Serializable { public class Constraint implements Serializable {
private String verb; private String verb;
@ -39,11 +40,12 @@ public class Constraint implements Serializable {
public void setValue(String value) { public void setValue(String value) {
this.value = value; this.value = value;
} }
//@JsonIgnore //@JsonIgnore
//public void setSelection(Selection sel) { // public void setSelection(Selection sel) {
// selection = sel; // selection = sel;
// } // }
@JsonIgnore @JsonIgnore
public void setSelection(VerbResolver resolver) public void setSelection(VerbResolver resolver)
throws InvocationTargetException, NoSuchMethodException, InstantiationException, throws InvocationTargetException, NoSuchMethodException, InstantiationException,
IllegalAccessException { IllegalAccessException {
@ -54,5 +56,4 @@ public class Constraint implements Serializable {
return selection.apply(metadata); return selection.apply(metadata);
} }
} }

View File

@ -12,6 +12,7 @@ import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken; import com.google.gson.reflect.TypeToken;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
@JsonAutoDetect @JsonAutoDetect
public class SelectionConstraints implements Serializable { public class SelectionConstraints implements Serializable {
private List<Constraints> criteria; private List<Constraints> criteria;

View File

@ -6,8 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import java.util.*; import java.util.*;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -20,6 +18,8 @@ import org.slf4j.LoggerFactory;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;

View File

@ -1572,36 +1572,36 @@ public class BulkTagJobTest {
void newConfTest() throws Exception { void newConfTest() throws Exception {
final String pathMap = BulkTagJobTest.pathMap; final String pathMap = BulkTagJobTest.pathMap;
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(), "-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", "-sourcePath",
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(), getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(),
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL, "-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
}); });
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Dataset> tmp = sc JavaRDD<Dataset> tmp = sc
.textFile(workingDir.toString() + "/dataset") .textFile(workingDir.toString() + "/dataset")
.map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class));
Assertions.assertEquals(10, tmp.count()); Assertions.assertEquals(10, tmp.count());
org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark org.apache.spark.sql.Dataset<Dataset> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); .createDataset(tmp.rdd(), Encoders.bean(Dataset.class));
verificationDataset.createOrReplaceTempView("dataset"); verificationDataset.createOrReplaceTempView("dataset");
String query = "select id, MyT.id community " String query = "select id, MyT.id community "
+ "from dataset " + "from dataset "
+ "lateral view explode(context) c as MyT " + "lateral view explode(context) c as MyT "
+ "lateral view explode(MyT.datainfo) d as MyD " + "lateral view explode(MyT.datainfo) d as MyD "
+ "where MyD.inferenceprovenance = 'bulktagging'"; + "where MyD.inferenceprovenance = 'bulktagging'";
Assertions.assertEquals(0, spark.sql(query).count()); Assertions.assertEquals(0, spark.sql(query).count());
} }

View File

@ -47,7 +47,7 @@ class CommunityConfigurationFactoryTest {
sc.setVerb("not_contains"); sc.setVerb("not_contains");
sc.setField("contributor"); sc.setField("contributor");
sc.setValue("DARIAH"); sc.setValue("DARIAH");
sc.setSelection(resolver);//.getSelectionCriteria(sc.getVerb(), sc.getValue())); sc.setSelection(resolver);// .getSelectionCriteria(sc.getVerb(), sc.getValue()));
String metadata = "This work has been partially supported by DARIAH-EU infrastructure"; String metadata = "This work has been partially supported by DARIAH-EU infrastructure";
Assertions.assertFalse(sc.verifyCriteria(metadata)); Assertions.assertFalse(sc.verifyCriteria(metadata));
} }

View File

@ -1,14 +1,8 @@
package eu.dnetlib.dhp.bulktag; package eu.dnetlib.dhp.bulktag;
import eu.dnetlib.dhp.api.Utils; import java.util.List;
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.bulktag.community.Community;
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
import eu.dnetlib.dhp.api.model.CommunityModel;
import eu.dnetlib.dhp.api.model.CommunitySummary;
import eu.dnetlib.dhp.api.model.DatasourceList;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -16,8 +10,13 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.api.QueryCommunityAPI; import eu.dnetlib.dhp.api.QueryCommunityAPI;
import eu.dnetlib.dhp.api.Utils;
import java.util.List; import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.api.model.CommunityModel;
import eu.dnetlib.dhp.api.model.CommunitySummary;
import eu.dnetlib.dhp.api.model.DatasourceList;
import eu.dnetlib.dhp.bulktag.community.Community;
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
/** /**
* @author miriam.baglioni * @author miriam.baglioni
@ -43,9 +42,12 @@ public class QueryCommunityAPITest {
void community() throws Exception { void community() throws Exception {
String id = "dh-ch"; String id = "dh-ch";
String body = QueryCommunityAPI.community(id); String body = QueryCommunityAPI.community(id);
System.out.println(new ObjectMapper().writeValueAsString(new ObjectMapper() System.out
.readValue(body, CommunityModel.class))) .println(
; new ObjectMapper()
.writeValueAsString(
new ObjectMapper()
.readValue(body, CommunityModel.class)));
} }
@Test @Test
@ -53,14 +55,14 @@ public class QueryCommunityAPITest {
String id = "dh-ch"; String id = "dh-ch";
String body = QueryCommunityAPI.communityDatasource(id); String body = QueryCommunityAPI.communityDatasource(id);
new ObjectMapper() new ObjectMapper()
.readValue(body, DatasourceList.class) .readValue(body, DatasourceList.class)
.forEach(ds-> { .forEach(ds -> {
try { try {
System.out.println(new ObjectMapper().writeValueAsString(ds)); System.out.println(new ObjectMapper().writeValueAsString(ds));
} catch (JsonProcessingException e) { } catch (JsonProcessingException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
}); });
; ;
} }
@ -68,24 +70,33 @@ public class QueryCommunityAPITest {
void validCommunities() throws Exception { void validCommunities() throws Exception {
CommunityConfiguration cc = Utils.getCommunityConfiguration(); CommunityConfiguration cc = Utils.getCommunityConfiguration();
System.out.println(cc.getCommunities().keySet()); System.out.println(cc.getCommunities().keySet());
Community community =cc.getCommunities().get("aurora"); Community community = cc.getCommunities().get("aurora");
Assertions.assertEquals(0, community.getSubjects().size()); Assertions.assertEquals(0, community.getSubjects().size());
Assertions.assertEquals(null, community.getConstraints()); Assertions.assertEquals(null, community.getConstraints());
Assertions.assertEquals(null, community.getRemoveConstraints()); Assertions.assertEquals(null, community.getRemoveConstraints());
Assertions.assertEquals(2, community.getZenodoCommunities().size()); Assertions.assertEquals(2, community.getZenodoCommunities().size());
Assertions.assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("aurora-universities-network"))); Assertions
Assertions.assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("university-of-innsbruck"))); .assertTrue(
community.getZenodoCommunities().stream().anyMatch(c -> c.equals("aurora-universities-network")));
Assertions
.assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("university-of-innsbruck")));
Assertions.assertEquals(35, community.getProviders().size()); Assertions.assertEquals(35, community.getProviders().size());
Assertions.assertEquals(35, community.getProviders().stream().filter(p->p.getSelectionConstraints()==null).count()); Assertions
.assertEquals(
35, community.getProviders().stream().filter(p -> p.getSelectionConstraints() == null).count());
} }
@Test @Test
void getCommunityProjects() throws Exception { void getCommunityProjects() throws Exception {
CommunityEntityMap projectMap = Utils.getCommunityProjects(); CommunityEntityMap projectMap = Utils.getCommunityProjects();
Assertions.assertFalse(projectMap.containsKey("mes")); Assertions.assertFalse(projectMap.containsKey("mes"));
Assertions.assertEquals(33, projectMap.size()); Assertions.assertEquals(33, projectMap.size());
Assertions.assertTrue(projectMap.keySet().stream().allMatch(k -> projectMap.get(k).stream().allMatch(p -> p.startsWith("40|")))); Assertions
.assertTrue(
projectMap
.keySet()
.stream()
.allMatch(k -> projectMap.get(k).stream().allMatch(p -> p.startsWith("40|"))));
} }
} }