[ENRICHMENT][BETA] Use of community API in enrichment process AND addition to tagging result for communities through projects #359

Merged
claudio.atzori merged 22 commits from propagationapi into beta 2023-11-30 14:20:34 +01:00
21 changed files with 268 additions and 239 deletions
Showing only changes of commit a3d01ccb24 - Show all commits

View File

@ -7,7 +7,6 @@ import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
import java.util.*; import java.util.*;
import eu.dnetlib.dhp.schema.oaf.utils.*;
import org.apache.commons.cli.ParseException; import org.apache.commons.cli.ParseException;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
@ -30,6 +29,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.utils.*;
import eu.dnetlib.dhp.utils.DHPUtils; import eu.dnetlib.dhp.utils.DHPUtils;
import scala.Tuple2; import scala.Tuple2;

View File

@ -33,15 +33,15 @@ case class mappingAuthor(
case class funderInfo(id: String, uri: String, name: String, synonym: List[String]) {} case class funderInfo(id: String, uri: String, name: String, synonym: List[String]) {}
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {} case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
case object Crossref2Oaf { case object Crossref2Oaf {
val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass) val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass)
val irishFunder: List[funderInfo] = { val irishFunder: List[funderInfo] = {
val s = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json")).mkString val s = Source
.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/doiboost/crossref/irish_funder.json"))
.mkString
implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats
lazy val json: org.json4s.JValue = parse(s) lazy val json: org.json4s.JValue = parse(s)
json.extract[List[funderInfo]] json.extract[List[funderInfo]]
@ -102,7 +102,9 @@ case object Crossref2Oaf {
def getIrishId(doi: String): Option[String] = { def getIrishId(doi: String): Option[String] = {
val id = doi.split("/").last val id = doi.split("/").last
irishFunder.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id)))).map(f => f.id) irishFunder
.find(f => id.equalsIgnoreCase(f.id) || (f.synonym.nonEmpty && f.synonym.exists(s => s.equalsIgnoreCase(id))))
.map(f => f.id)
} }
def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = { def mappingResult(result: Result, json: JValue, cobjCategory: String): Result = {

View File

@ -74,5 +74,4 @@ public class QueryCommunityAPI {
return body; return body;
} }
} }

View File

@ -1,16 +1,6 @@
package eu.dnetlib.dhp.api; package eu.dnetlib.dhp.api;
import com.amazonaws.util.StringUtils;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.api.model.*;
import eu.dnetlib.dhp.bulktag.community.Community;
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
import eu.dnetlib.dhp.bulktag.community.Provider;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
import javax.management.Query;
import java.io.IOException; import java.io.IOException;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
@ -19,6 +9,19 @@ import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import javax.management.Query;
import com.amazonaws.util.StringUtils;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.api.model.*;
import eu.dnetlib.dhp.bulktag.community.Community;
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
import eu.dnetlib.dhp.bulktag.community.Provider;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
/** /**
* @author miriam.baglioni * @author miriam.baglioni
* @Date 09/10/23 * @Date 09/10/23
@ -33,7 +36,8 @@ public class Utils implements Serializable {
getValidCommunities() getValidCommunities()
.forEach(community -> { .forEach(community -> {
try { try {
CommunityModel cm = MAPPER.readValue(QueryCommunityAPI.community(community.getId()), CommunityModel.class); CommunityModel cm = MAPPER
.readValue(QueryCommunityAPI.community(community.getId()), CommunityModel.class);
validCommunities.add(getCommunity(cm)); validCommunities.add(getCommunity(cm));
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
@ -41,7 +45,8 @@ public class Utils implements Serializable {
}); });
validCommunities.forEach(community -> { validCommunities.forEach(community -> {
try { try {
DatasourceList dl = MAPPER.readValue(QueryCommunityAPI.communityDatasource(community.getId()), DatasourceList.class); DatasourceList dl = MAPPER
.readValue(QueryCommunityAPI.communityDatasource(community.getId()), DatasourceList.class);
community.setProviders(dl.stream().map(d -> { community.setProviders(dl.stream().map(d -> {
// if(d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled())) // if(d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled()))
// return null; // return null;
@ -85,19 +90,23 @@ public class Utils implements Serializable {
} }
public static List<CommunityModel> getValidCommunities() throws IOException { public static List<CommunityModel> getValidCommunities() throws IOException {
return MAPPER.readValue(QueryCommunityAPI.communities(), CommunitySummary.class) return MAPPER
.readValue(QueryCommunityAPI.communities(), CommunitySummary.class)
.stream() .stream()
.filter(community -> !community.getStatus().equals("hidden") && .filter(
community -> !community.getStatus().equals("hidden") &&
(community.getType().equals("ri") || community.getType().equals("community"))) (community.getType().equals("ri") || community.getType().equals("community")))
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
public static CommunityEntityMap getCommunityOrganization() throws IOException { public static CommunityEntityMap getCommunityOrganization() throws IOException {
CommunityEntityMap organizationMap = new CommunityEntityMap(); CommunityEntityMap organizationMap = new CommunityEntityMap();
getValidCommunities() getValidCommunities()
.forEach(community -> { .forEach(community -> {
String id = community.getId(); String id = community.getId();
try { try {
List<String> associatedOrgs = MAPPER.readValue(QueryCommunityAPI.communityPropagationOrganization(id), OrganizationList.class); List<String> associatedOrgs = MAPPER
.readValue(QueryCommunityAPI.communityPropagationOrganization(id), OrganizationList.class);
if (associatedOrgs.size() > 0) { if (associatedOrgs.size() > 0) {
organizationMap.put(id, associatedOrgs); organizationMap.put(id, associatedOrgs);
} }
@ -119,11 +128,14 @@ public class Utils implements Serializable {
do { do {
page++; page++;
try { try {
cm = MAPPER.readValue( QueryCommunityAPI.communityProjects(community.getId(), String.valueOf(page), String.valueOf(size)), ContentModel.class); cm = MAPPER
.readValue(
QueryCommunityAPI
.communityProjects(community.getId(), String.valueOf(page), String.valueOf(size)),
ContentModel.class);
if (cm.getContent().size() > 0) { if (cm.getContent().size() > 0) {
cm.getContent().forEach(p -> cm.getContent().forEach(p -> projectList.add("40|" + p.getOpenaireId()));
projectList.add ("40|" + p.getOpenaireId()));
projectMap.put(community.getId(), projectList); projectMap.put(community.getId(), projectList);
} }
} catch (IOException e) { } catch (IOException e) {

View File

@ -1,10 +1,11 @@
package eu.dnetlib.dhp.api.model; package eu.dnetlib.dhp.api.model;
import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
@JsonAutoDetect @JsonAutoDetect
@JsonIgnoreProperties(ignoreUnknown = true) @JsonIgnoreProperties(ignoreUnknown = true)
@ -30,7 +31,6 @@ public class CommunityContentprovider {
this.openaireId = openaireId; this.openaireId = openaireId;
} }
public SelectionConstraints getSelectioncriteria() { public SelectionConstraints getSelectioncriteria() {
return this.selectioncriteria; return this.selectioncriteria;

View File

@ -5,8 +5,8 @@ import java.io.Serializable;
import java.util.List; import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
/** /**
* @author miriam.baglioni * @author miriam.baglioni

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.api.model; package eu.dnetlib.dhp.api.model;
import java.io.Serializable; import java.io.Serializable;
@ -12,5 +13,3 @@ public class CommunitySummary extends ArrayList<CommunityModel> implements Seria
super(); super();
} }
} }

View File

@ -1,10 +1,11 @@
package eu.dnetlib.dhp.api.model;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties; package eu.dnetlib.dhp.api.model;
import java.io.Serializable; import java.io.Serializable;
import java.util.List; import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
/** /**
* @author miriam.baglioni * @author miriam.baglioni
* @Date 09/10/23 * @Date 09/10/23

View File

@ -1,10 +1,11 @@
package eu.dnetlib.dhp.api.model; package eu.dnetlib.dhp.api.model;
import eu.dnetlib.dhp.api.model.CommunityContentprovider;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import eu.dnetlib.dhp.api.model.CommunityContentprovider;
public class DatasourceList extends ArrayList<CommunityContentprovider> implements Serializable { public class DatasourceList extends ArrayList<CommunityContentprovider> implements Serializable {
public DatasourceList() { public DatasourceList() {
super(); super();

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.api.model; package eu.dnetlib.dhp.api.model;
import java.io.Serializable; import java.io.Serializable;

View File

@ -1,9 +1,10 @@
package eu.dnetlib.dhp.api.model; package eu.dnetlib.dhp.api.model;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import java.io.Serializable; import java.io.Serializable;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
/** /**
* @author miriam.baglioni * @author miriam.baglioni
* @Date 09/10/23 * @Date 09/10/23

View File

@ -6,7 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.util.*; import java.util.*;
import eu.dnetlib.dhp.api.Utils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.FilterFunction;
@ -21,6 +20,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.bulktag.community.*; import eu.dnetlib.dhp.bulktag.community.*;
import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.Datasource;

View File

@ -8,7 +8,6 @@ import java.util.Optional;
import com.google.gson.Gson; import com.google.gson.Gson;
/** Created by miriam on 01/08/2018. */ /** Created by miriam on 01/08/2018. */
public class Community implements Serializable { public class Community implements Serializable {

View File

@ -5,6 +5,7 @@ import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;

View File

@ -4,9 +4,10 @@ package eu.dnetlib.dhp.bulktag.community;
import java.io.Serializable; import java.io.Serializable;
import java.lang.reflect.InvocationTargetException; import java.lang.reflect.InvocationTargetException;
import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore;
import eu.dnetlib.dhp.bulktag.criteria.Selection; import eu.dnetlib.dhp.bulktag.criteria.Selection;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import org.apache.htrace.fasterxml.jackson.annotation.JsonIgnore;
public class Constraint implements Serializable { public class Constraint implements Serializable {
private String verb; private String verb;
@ -39,6 +40,7 @@ public class Constraint implements Serializable {
public void setValue(String value) { public void setValue(String value) {
this.value = value; this.value = value;
} }
//@JsonIgnore //@JsonIgnore
// public void setSelection(Selection sel) { // public void setSelection(Selection sel) {
// selection = sel; // selection = sel;
@ -54,5 +56,4 @@ public class Constraint implements Serializable {
return selection.apply(metadata); return selection.apply(metadata);
} }
} }

View File

@ -12,6 +12,7 @@ import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken; import com.google.gson.reflect.TypeToken;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
@JsonAutoDetect @JsonAutoDetect
public class SelectionConstraints implements Serializable { public class SelectionConstraints implements Serializable {
private List<Constraints> criteria; private List<Constraints> criteria;

View File

@ -6,8 +6,6 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession;
import java.util.*; import java.util.*;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -20,6 +18,8 @@ import org.slf4j.LoggerFactory;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;

View File

@ -1,14 +1,8 @@
package eu.dnetlib.dhp.bulktag; package eu.dnetlib.dhp.bulktag;
import eu.dnetlib.dhp.api.Utils; import java.util.List;
import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.bulktag.community.Community;
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
import eu.dnetlib.dhp.api.model.CommunityModel;
import eu.dnetlib.dhp.api.model.CommunitySummary;
import eu.dnetlib.dhp.api.model.DatasourceList;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -16,8 +10,13 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.api.QueryCommunityAPI; import eu.dnetlib.dhp.api.QueryCommunityAPI;
import eu.dnetlib.dhp.api.Utils;
import java.util.List; import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.api.model.CommunityModel;
import eu.dnetlib.dhp.api.model.CommunitySummary;
import eu.dnetlib.dhp.api.model.DatasourceList;
import eu.dnetlib.dhp.bulktag.community.Community;
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
/** /**
* @author miriam.baglioni * @author miriam.baglioni
@ -43,9 +42,12 @@ public class QueryCommunityAPITest {
void community() throws Exception { void community() throws Exception {
String id = "dh-ch"; String id = "dh-ch";
String body = QueryCommunityAPI.community(id); String body = QueryCommunityAPI.community(id);
System.out.println(new ObjectMapper().writeValueAsString(new ObjectMapper() System.out
.readValue(body, CommunityModel.class))) .println(
; new ObjectMapper()
.writeValueAsString(
new ObjectMapper()
.readValue(body, CommunityModel.class)));
} }
@Test @Test
@ -73,10 +75,15 @@ public class QueryCommunityAPITest {
Assertions.assertEquals(null, community.getConstraints()); Assertions.assertEquals(null, community.getConstraints());
Assertions.assertEquals(null, community.getRemoveConstraints()); Assertions.assertEquals(null, community.getRemoveConstraints());
Assertions.assertEquals(2, community.getZenodoCommunities().size()); Assertions.assertEquals(2, community.getZenodoCommunities().size());
Assertions.assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("aurora-universities-network"))); Assertions
Assertions.assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("university-of-innsbruck"))); .assertTrue(
community.getZenodoCommunities().stream().anyMatch(c -> c.equals("aurora-universities-network")));
Assertions
.assertTrue(community.getZenodoCommunities().stream().anyMatch(c -> c.equals("university-of-innsbruck")));
Assertions.assertEquals(35, community.getProviders().size()); Assertions.assertEquals(35, community.getProviders().size());
Assertions.assertEquals(35, community.getProviders().stream().filter(p->p.getSelectionConstraints()==null).count()); Assertions
.assertEquals(
35, community.getProviders().stream().filter(p -> p.getSelectionConstraints() == null).count());
} }
@Test @Test
@ -84,8 +91,12 @@ public class QueryCommunityAPITest {
CommunityEntityMap projectMap = Utils.getCommunityProjects(); CommunityEntityMap projectMap = Utils.getCommunityProjects();
Assertions.assertFalse(projectMap.containsKey("mes")); Assertions.assertFalse(projectMap.containsKey("mes"));
Assertions.assertEquals(33, projectMap.size()); Assertions.assertEquals(33, projectMap.size());
Assertions.assertTrue(projectMap.keySet().stream().allMatch(k -> projectMap.get(k).stream().allMatch(p -> p.startsWith("40|")))); Assertions
.assertTrue(
projectMap
.keySet()
.stream()
.allMatch(k -> projectMap.get(k).stream().allMatch(p -> p.startsWith("40|"))));
} }
} }