From a3d01ccb246e52e7977aef8c57b15d964dcc9834 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Mon, 9 Oct 2023 14:52:17 +0200 Subject: [PATCH] refactoring --- .../CreateActionSetSparkJob.java | 2 +- .../doiboost/crossref/Crossref2Oaf.scala | 16 +- .../crossref/CrossrefMappingTest.scala | 2 +- .../eu/dnetlib/dhp/api/QueryCommunityAPI.java | 9 +- .../main/java/eu/dnetlib/dhp/api/Utils.java | 236 +++++++++--------- .../api/model/CommunityContentprovider.java | 6 +- .../dnetlib/dhp/api/model/CommunityModel.java | 2 +- .../dhp/api/model/CommunitySummary.java | 11 +- .../dnetlib/dhp/api/model/ContentModel.java | 61 ++--- .../dnetlib/dhp/api/model/DatasourceList.java | 11 +- .../dhp/api/model/OrganizationList.java | 7 +- .../dnetlib/dhp/api/model/ProjectModel.java | 19 +- .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 4 +- .../dhp/bulktag/community/Community.java | 1 - .../CommunityConfigurationFactory.java | 1 + .../dhp/bulktag/community/Constraint.java | 9 +- .../community/SelectionConstraints.java | 1 + .../PrepareResultCommunitySet.java | 4 +- .../dnetlib/dhp/bulktag/BulkTagJobTest.java | 38 +-- .../CommunityConfigurationFactoryTest.java | 2 +- .../dhp/bulktag/QueryCommunityAPITest.java | 65 +++-- 21 files changed, 268 insertions(+), 239 deletions(-) diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java index a367ba852..b707fdcd3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/opencitations/CreateActionSetSparkJob.java @@ -7,7 +7,6 @@ import java.io.IOException; import java.io.Serializable; import java.util.*; -import eu.dnetlib.dhp.schema.oaf.utils.*; import org.apache.commons.cli.ParseException; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; @@ -30,6 +29,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.*; import eu.dnetlib.dhp.utils.DHPUtils; import scala.Tuple2; diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index e0fdb9ce4..565d34e62 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -31,9 +31,7 @@ case class mappingAuthor( affiliation: Option[mappingAffiliation] ) {} -case class funderInfo(id:String,uri:String, name:String,synonym:List[String] ) {} - - +case class funderInfo(id: String, uri: String, name: String, synonym: List[String]) {} case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {} @@ -41,7 +39,9 @@ case object Crossref2Oaf { val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass) val irishFunder: List[funderInfo] = { - val s = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json")).mkString + val s = Source + .fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json")) + .mkString implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats lazy val json: org.json4s.JValue = parse(s) json.extract[List[funderInfo]] @@ -100,9 +100,11 @@ case object Crossref2Oaf { "report" -> "0017 Report" ) - def getIrishId(doi:String):Option[String] = { - val id =doi.split("/").last - irishFunder.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id)))).map(f => f.id) + def getIrishId(doi: String): Option[String] = { + val id = doi.split("/").last + irishFunder + .find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id)))) + .map(f => f.id) } def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index 7961376c5..fbf6f72c0 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -50,7 +50,7 @@ class CrossrefMappingTest { } } - def checkRelation(generatedOAF: List[Oaf]): Unit = { + def checkRelation(generatedOAF: List[Oaf]): Unit = { val rels: List[Relation] = generatedOAF.filter(p => p.isInstanceOf[Relation]).asInstanceOf[List[Relation]] diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java index 635ee2027..cc615ba46 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java @@ -21,7 +21,7 @@ import com.google.gson.Gson; public class QueryCommunityAPI { private static final String baseUrl = "https://services.openaire.eu/openaire/"; - private static String get(String geturl) throws IOException{ + private static String get(String geturl) throws IOException { URL url = new URL(geturl); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setDoOutput(true); @@ -44,7 +44,7 @@ public class QueryCommunityAPI { return get(baseUrl + "community/" + id); } - public static String communityDatasource(String id)throws IOException{ + public static String communityDatasource(String id) throws IOException { return get(baseUrl + "community/" + id + "/contentproviders"); } @@ -53,8 +53,8 @@ public class QueryCommunityAPI { return get(baseUrl + "community/" + id + "/propagationOrganizations"); } - public static String communityProjects(String id, String page, String size) throws IOException{ - return get(baseUrl + "community/" + id +"/projects/" + page + "/" + size); + public static String communityProjects(String id, String page, String size) throws IOException { + return get(baseUrl + "community/" + id + "/projects/" + page + "/" + size); } @NotNull @@ -74,5 +74,4 @@ public class QueryCommunityAPI { return body; } - } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java index 5b3004a5d..c1aaa14c4 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java @@ -1,16 +1,6 @@ + package eu.dnetlib.dhp.api; -import com.amazonaws.util.StringUtils; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Maps; -import eu.dnetlib.dhp.api.model.*; -import eu.dnetlib.dhp.bulktag.community.Community; -import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; -import eu.dnetlib.dhp.bulktag.community.Provider; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; - -import javax.management.Query; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; @@ -19,118 +9,140 @@ import java.util.Map; import java.util.Objects; import java.util.stream.Collectors; +import javax.management.Query; + +import com.amazonaws.util.StringUtils; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Maps; + +import eu.dnetlib.dhp.api.model.*; +import eu.dnetlib.dhp.bulktag.community.Community; +import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; +import eu.dnetlib.dhp.bulktag.community.Provider; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; + /** * @author miriam.baglioni * @Date 09/10/23 */ public class Utils implements Serializable { - private static final ObjectMapper MAPPER = new ObjectMapper(); - private static final VerbResolver resolver = VerbResolverFactory.newInstance(); + private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final VerbResolver resolver = VerbResolverFactory.newInstance(); - public static CommunityConfiguration getCommunityConfiguration() throws IOException { - final Map communities = Maps.newHashMap(); - List validCommunities = new ArrayList<>(); - getValidCommunities() - .forEach(community -> { - try { - CommunityModel cm = MAPPER.readValue(QueryCommunityAPI.community(community.getId()), CommunityModel.class); - validCommunities.add(getCommunity(cm)); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - validCommunities.forEach(community ->{ - try { - DatasourceList dl = MAPPER.readValue(QueryCommunityAPI.communityDatasource(community.getId()), DatasourceList.class); - community.setProviders(dl.stream().map(d -> { + public static CommunityConfiguration getCommunityConfiguration() throws IOException { + final Map communities = Maps.newHashMap(); + List validCommunities = new ArrayList<>(); + getValidCommunities() + .forEach(community -> { + try { + CommunityModel cm = MAPPER + .readValue(QueryCommunityAPI.community(community.getId()), CommunityModel.class); + validCommunities.add(getCommunity(cm)); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + validCommunities.forEach(community -> { + try { + DatasourceList dl = MAPPER + .readValue(QueryCommunityAPI.communityDatasource(community.getId()), DatasourceList.class); + community.setProviders(dl.stream().map(d -> { // if(d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled())) // return null; - Provider p = new Provider(); - p.setOpenaireId("10|" + d.getOpenaireId()); - p.setSelectionConstraints(d.getSelectioncriteria()); - if(p.getSelectionConstraints() != null) - p.getSelectionConstraints().setSelection(resolver); - return p; - }) - .filter(Objects::nonNull) - .collect(Collectors.toList())); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); + Provider p = new Provider(); + p.setOpenaireId("10|" + d.getOpenaireId()); + p.setSelectionConstraints(d.getSelectioncriteria()); + if (p.getSelectionConstraints() != null) + p.getSelectionConstraints().setSelection(resolver); + return p; + }) + .filter(Objects::nonNull) + .collect(Collectors.toList())); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); - validCommunities.forEach(community ->{ - if(community.isValid()) - communities.put(community.getId(), community); - }); - return new CommunityConfiguration(communities); - } + validCommunities.forEach(community -> { + if (community.isValid()) + communities.put(community.getId(), community); + }); + return new CommunityConfiguration(communities); + } - private static Community getCommunity(CommunityModel cm){ - Community c = new Community(); - c.setId(cm.getId()); - c.setZenodoCommunities(cm.getOtherZenodoCommunities()); - if(!StringUtils.isNullOrEmpty(cm.getZenodoCommunity())) - c.getZenodoCommunities().add(cm.getZenodoCommunity()); - c.setSubjects(cm.getSubjects()); - c.getSubjects().addAll(cm.getFos()); - c.getSubjects().addAll(cm.getSdg()); - c.setConstraints(cm.getAdvancedConstraints()); - if(c.getConstraints()!=null) - c.getConstraints().setSelection(resolver); - c.setRemoveConstraints(cm.getRemoveConstraints()); - if(c.getRemoveConstraints()!=null) - c.getRemoveConstraints().setSelection(resolver); - return c; - } + private static Community getCommunity(CommunityModel cm) { + Community c = new Community(); + c.setId(cm.getId()); + c.setZenodoCommunities(cm.getOtherZenodoCommunities()); + if (!StringUtils.isNullOrEmpty(cm.getZenodoCommunity())) + c.getZenodoCommunities().add(cm.getZenodoCommunity()); + c.setSubjects(cm.getSubjects()); + c.getSubjects().addAll(cm.getFos()); + c.getSubjects().addAll(cm.getSdg()); + c.setConstraints(cm.getAdvancedConstraints()); + if (c.getConstraints() != null) + c.getConstraints().setSelection(resolver); + c.setRemoveConstraints(cm.getRemoveConstraints()); + if (c.getRemoveConstraints() != null) + c.getRemoveConstraints().setSelection(resolver); + return c; + } - public static List getValidCommunities() throws IOException { - return MAPPER.readValue(QueryCommunityAPI.communities(), CommunitySummary.class) - .stream() - .filter(community -> !community.getStatus().equals("hidden") && - (community.getType().equals("ri") || community.getType().equals("community"))) - .collect(Collectors.toList()); - } - public static CommunityEntityMap getCommunityOrganization() throws IOException { - CommunityEntityMap organizationMap = new CommunityEntityMap(); - getValidCommunities() - .forEach(community -> { - String id = community.getId(); - try { - List associatedOrgs = MAPPER.readValue(QueryCommunityAPI.communityPropagationOrganization(id), OrganizationList.class); - if(associatedOrgs.size() >0){ - organizationMap.put(id, associatedOrgs); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - return organizationMap; - } + public static List getValidCommunities() throws IOException { + return MAPPER + .readValue(QueryCommunityAPI.communities(), CommunitySummary.class) + .stream() + .filter( + community -> !community.getStatus().equals("hidden") && + (community.getType().equals("ri") || community.getType().equals("community"))) + .collect(Collectors.toList()); + } - public static CommunityEntityMap getCommunityProjects()throws IOException{ - CommunityEntityMap projectMap = new CommunityEntityMap(); - getValidCommunities() - .forEach(community ->{ - int page = -1; - int size = 100; - ContentModel cm = new ContentModel(); - List projectList = new ArrayList<>(); - do{ - page ++; - try { - cm = MAPPER.readValue( QueryCommunityAPI.communityProjects(community.getId(), String.valueOf(page), String.valueOf(size)), ContentModel.class); - if (cm.getContent().size() > 0){ + public static CommunityEntityMap getCommunityOrganization() throws IOException { + CommunityEntityMap organizationMap = new CommunityEntityMap(); + getValidCommunities() + .forEach(community -> { + String id = community.getId(); + try { + List associatedOrgs = MAPPER + .readValue(QueryCommunityAPI.communityPropagationOrganization(id), OrganizationList.class); + if (associatedOrgs.size() > 0) { + organizationMap.put(id, associatedOrgs); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + return organizationMap; + } - cm.getContent().forEach(p -> - projectList.add ("40|" + p.getOpenaireId())); - projectMap.put(community.getId(), projectList); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - }while (!cm.getLast()); - }); - return projectMap; - } + public static CommunityEntityMap getCommunityProjects() throws IOException { + CommunityEntityMap projectMap = new CommunityEntityMap(); + getValidCommunities() + .forEach(community -> { + int page = -1; + int size = 100; + ContentModel cm = new ContentModel(); + List projectList = new ArrayList<>(); + do { + page++; + try { + cm = MAPPER + .readValue( + QueryCommunityAPI + .communityProjects(community.getId(), String.valueOf(page), String.valueOf(size)), + ContentModel.class); + if (cm.getContent().size() > 0) { + + cm.getContent().forEach(p -> projectList.add("40|" + p.getOpenaireId())); + projectMap.put(community.getId(), projectList); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } while (!cm.getLast()); + }); + return projectMap; + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java index 5378e556a..9fab5a80c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityContentprovider.java @@ -1,10 +1,11 @@ + package eu.dnetlib.dhp.api.model; import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.google.gson.Gson; -import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; +import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; @JsonAutoDetect @JsonIgnoreProperties(ignoreUnknown = true) @@ -30,10 +31,9 @@ public class CommunityContentprovider { this.openaireId = openaireId; } - public SelectionConstraints getSelectioncriteria() { - return this.selectioncriteria; + return this.selectioncriteria; } public void setSelectioncriteria(SelectionConstraints selectioncriteria) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityModel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityModel.java index c5b3fac7c..745e7efc2 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityModel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityModel.java @@ -5,8 +5,8 @@ import java.io.Serializable; import java.util.List; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; +import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; /** * @author miriam.baglioni diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java index 774b6c874..a0541f7ee 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.api.model; import java.io.Serializable; @@ -8,9 +9,7 @@ import java.util.ArrayList; * @Date 06/10/23 */ public class CommunitySummary extends ArrayList implements Serializable { - public CommunitySummary() { - super(); - } - } - - + public CommunitySummary() { + super(); + } +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ContentModel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ContentModel.java index a755a9455..469709f59 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ContentModel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ContentModel.java @@ -1,50 +1,51 @@ -package eu.dnetlib.dhp.api.model; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +package eu.dnetlib.dhp.api.model; import java.io.Serializable; import java.util.List; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + /** * @author miriam.baglioni * @Date 09/10/23 */ @JsonIgnoreProperties(ignoreUnknown = true) public class ContentModel implements Serializable { - private List content; - private Integer totalPages; - private Boolean last; - private Integer number; + private List content; + private Integer totalPages; + private Boolean last; + private Integer number; - public List getContent() { - return content; - } + public List getContent() { + return content; + } - public void setContent(List content) { - this.content = content; - } + public void setContent(List content) { + this.content = content; + } - public Integer getTotalPages() { - return totalPages; - } + public Integer getTotalPages() { + return totalPages; + } - public void setTotalPages(Integer totalPages) { - this.totalPages = totalPages; - } + public void setTotalPages(Integer totalPages) { + this.totalPages = totalPages; + } - public Boolean getLast() { - return last; - } + public Boolean getLast() { + return last; + } - public void setLast(Boolean last) { - this.last = last; - } + public void setLast(Boolean last) { + this.last = last; + } - public Integer getNumber() { - return number; - } + public Integer getNumber() { + return number; + } - public void setNumber(Integer number) { - this.number = number; - } + public void setNumber(Integer number) { + this.number = number; + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/DatasourceList.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/DatasourceList.java index 8e31d7612..30d0241c3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/DatasourceList.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/DatasourceList.java @@ -1,12 +1,13 @@ + package eu.dnetlib.dhp.api.model; -import eu.dnetlib.dhp.api.model.CommunityContentprovider; - - import java.io.Serializable; import java.util.ArrayList; + +import eu.dnetlib.dhp.api.model.CommunityContentprovider; + public class DatasourceList extends ArrayList implements Serializable { - public DatasourceList(){ + public DatasourceList() { super(); } -} \ No newline at end of file +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java index 6895967ff..3c81ad179 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.api.model; import java.io.Serializable; @@ -9,7 +10,7 @@ import java.util.ArrayList; */ public class OrganizationList extends ArrayList implements Serializable { - public OrganizationList(){ - super(); - } + public OrganizationList() { + super(); + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ProjectModel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ProjectModel.java index 50bdcf649..3495d6a63 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ProjectModel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/ProjectModel.java @@ -1,9 +1,10 @@ + package eu.dnetlib.dhp.api.model; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - import java.io.Serializable; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + /** * @author miriam.baglioni * @Date 09/10/23 @@ -11,13 +12,13 @@ import java.io.Serializable; @JsonIgnoreProperties(ignoreUnknown = true) public class ProjectModel implements Serializable { - private String openaireId; + private String openaireId; - public String getOpenaireId() { - return openaireId; - } + public String getOpenaireId() { + return openaireId; + } - public void setOpenaireId(String openaireId) { - this.openaireId = openaireId; - } + public void setOpenaireId(String openaireId) { + this.openaireId = openaireId; + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index b61bc2c9f..0d98e4958 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -6,7 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.*; -import eu.dnetlib.dhp.api.Utils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -21,6 +20,7 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.gson.Gson; +import eu.dnetlib.dhp.api.Utils; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.bulktag.community.*; import eu.dnetlib.dhp.schema.oaf.Datasource; @@ -87,7 +87,7 @@ public class SparkBulkTagJob { if (isTest) { cc = CommunityConfigurationFactory.newInstance(taggingConf); } else { - cc = Utils.getCommunityConfiguration();//QueryInformationSystem.getCommunityConfiguration(parser.get("isLookUpUrl")); + cc = Utils.getCommunityConfiguration();// QueryInformationSystem.getCommunityConfiguration(parser.get("isLookUpUrl")); } runWithSparkSession( diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java index d281f8783..9cd3a8f82 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java @@ -8,7 +8,6 @@ import java.util.Optional; import com.google.gson.Gson; - /** Created by miriam on 01/08/2018. */ public class Community implements Serializable { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java index 013bf7168..955ca3856 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java @@ -5,6 +5,7 @@ import java.io.StringReader; import java.util.ArrayList; import java.util.List; import java.util.Map; + import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java index 03ae1117b..82a6a3b85 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java @@ -4,9 +4,10 @@ package eu.dnetlib.dhp.bulktag.community; import java.io.Serializable; import java.lang.reflect.InvocationTargetException; +import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore; + import eu.dnetlib.dhp.bulktag.criteria.Selection; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; -import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore; public class Constraint implements Serializable { private String verb; @@ -39,11 +40,12 @@ public class Constraint implements Serializable { public void setValue(String value) { this.value = value; } + //@JsonIgnore - //public void setSelection(Selection sel) { + // public void setSelection(Selection sel) { // selection = sel; // } -@JsonIgnore + @JsonIgnore public void setSelection(VerbResolver resolver) throws InvocationTargetException, NoSuchMethodException, InstantiationException, IllegalAccessException { @@ -54,5 +56,4 @@ public class Constraint implements Serializable { return selection.apply(metadata); } - } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java index 8e6d25e9b..57cc658fc 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java @@ -12,6 +12,7 @@ import com.google.gson.Gson; import com.google.gson.reflect.TypeToken; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; + @JsonAutoDetect public class SelectionConstraints implements Serializable { private List criteria; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index ff6b73286..e0670b80f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -6,8 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import java.util.*; -import eu.dnetlib.dhp.api.Utils; -import eu.dnetlib.dhp.api.model.CommunityEntityMap; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; @@ -20,6 +18,8 @@ import org.slf4j.LoggerFactory; import com.google.gson.Gson; +import eu.dnetlib.dhp.api.Utils; +import eu.dnetlib.dhp.api.model.CommunityEntityMap; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 745cd7e6f..11dad9055 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -1572,36 +1572,36 @@ public class BulkTagJobTest { void newConfTest() throws Exception { final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob - .main( - new String[] { - "-isTest", Boolean.TRUE.toString(), - "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-sourcePath", - getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(), - "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", - "-outputPath", workingDir.toString() + "/dataset", - "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, - "-pathMap", pathMap - }); + .main( + new String[] { + "-isTest", Boolean.TRUE.toString(), + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", + getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(), + "-taggingConf", taggingConf, + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", + "-outputPath", workingDir.toString() + "/dataset", + "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, + "-pathMap", pathMap + }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); JavaRDD tmp = sc - .textFile(workingDir.toString() + "/dataset") - .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); + .textFile(workingDir.toString() + "/dataset") + .map(item -> OBJECT_MAPPER.readValue(item, Dataset.class)); Assertions.assertEquals(10, tmp.count()); org.apache.spark.sql.Dataset verificationDataset = spark - .createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); + .createDataset(tmp.rdd(), Encoders.bean(Dataset.class)); verificationDataset.createOrReplaceTempView("dataset"); String query = "select id, MyT.id community " - + "from dataset " - + "lateral view explode(context) c as MyT " - + "lateral view explode(MyT.datainfo) d as MyD " - + "where MyD.inferenceprovenance = 'bulktagging'"; + + "from dataset " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; Assertions.assertEquals(0, spark.sql(query).count()); } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index 7e12bf9c3..5f0b1d7f1 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -47,7 +47,7 @@ class CommunityConfigurationFactoryTest { sc.setVerb("not_contains"); sc.setField("contributor"); sc.setValue("DARIAH"); - sc.setSelection(resolver);//.getSelectionCriteria(sc.getVerb(), sc.getValue())); + sc.setSelection(resolver);// .getSelectionCriteria(sc.getVerb(), sc.getValue())); String metadata = "This work has been partially supported by DARIAH-EU infrastructure"; Assertions.assertFalse(sc.verifyCriteria(metadata)); } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/QueryCommunityAPITest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/QueryCommunityAPITest.java index 1b3403535..b0043d1e8 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/QueryCommunityAPITest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/QueryCommunityAPITest.java @@ -1,14 +1,8 @@ package eu.dnetlib.dhp.bulktag; -import eu.dnetlib.dhp.api.Utils; -import eu.dnetlib.dhp.api.model.CommunityEntityMap; -import eu.dnetlib.dhp.bulktag.community.Community; -import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; +import java.util.List; -import eu.dnetlib.dhp.api.model.CommunityModel; -import eu.dnetlib.dhp.api.model.CommunitySummary; -import eu.dnetlib.dhp.api.model.DatasourceList; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -16,8 +10,13 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.api.QueryCommunityAPI; - -import java.util.List; +import eu.dnetlib.dhp.api.Utils; +import eu.dnetlib.dhp.api.model.CommunityEntityMap; +import eu.dnetlib.dhp.api.model.CommunityModel; +import eu.dnetlib.dhp.api.model.CommunitySummary; +import eu.dnetlib.dhp.api.model.DatasourceList; +import eu.dnetlib.dhp.bulktag.community.Community; +import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; /** * @author miriam.baglioni @@ -43,9 +42,12 @@ public class QueryCommunityAPITest { void community() throws Exception { String id = "dh-ch"; String body = QueryCommunityAPI.community(id); - System.out.println(new ObjectMapper().writeValueAsString(new ObjectMapper() - .readValue(body, CommunityModel.class))) - ; + System.out + .println( + new ObjectMapper() + .writeValueAsString( + new ObjectMapper() + .readValue(body, CommunityModel.class))); } @Test @@ -53,14 +55,14 @@ public class QueryCommunityAPITest { String id = "dh-ch"; String body = QueryCommunityAPI.communityDatasource(id); new ObjectMapper() - .readValue(body, DatasourceList.class) - .forEach(ds-> { - try { - System.out.println(new ObjectMapper().writeValueAsString(ds)); - } catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - }); + .readValue(body, DatasourceList.class) + .forEach(ds -> { + try { + System.out.println(new ObjectMapper().writeValueAsString(ds)); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + }); ; } @@ -68,24 +70,33 @@ public class QueryCommunityAPITest { void validCommunities() throws Exception { CommunityConfiguration cc = Utils.getCommunityConfiguration(); System.out.println(cc.getCommunities().keySet()); - Community community =cc.getCommunities().get("aurora"); + Community community = cc.getCommunities().get("aurora"); Assertions.assertEquals(0, community.getSubjects().size()); Assertions.assertEquals(null, community.getConstraints()); Assertions.assertEquals(null, community.getRemoveConstraints()); Assertions.assertEquals(2, community.getZenodoCommunities().size()); - Assertions.assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("aurora-universities-network"))); - Assertions.assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("university-of-innsbruck"))); + Assertions + .assertTrue( + community.getZenodoCommunities().stream().anyMatch(c -> c.equals("aurora-universities-network"))); + Assertions + .assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("university-of-innsbruck"))); Assertions.assertEquals(35, community.getProviders().size()); - Assertions.assertEquals(35, community.getProviders().stream().filter(p->p.getSelectionConstraints()==null).count()); + Assertions + .assertEquals( + 35, community.getProviders().stream().filter(p -> p.getSelectionConstraints() == null).count()); } @Test void getCommunityProjects() throws Exception { CommunityEntityMap projectMap = Utils.getCommunityProjects(); Assertions.assertFalse(projectMap.containsKey("mes")); - Assertions.assertEquals(33, projectMap.size()); - Assertions.assertTrue(projectMap.keySet().stream().allMatch(k -> projectMap.get(k).stream().allMatch(p -> p.startsWith("40|")))); + Assertions.assertEquals(33, projectMap.size()); + Assertions + .assertTrue( + projectMap + .keySet() + .stream() + .allMatch(k -> projectMap.get(k).stream().allMatch(p -> p.startsWith("40|")))); } } -