1
0
Fork 0

[bulktag subcommunities] refactoring and addition of new properties

This commit is contained in:
Miriam Baglioni 2024-12-20 09:06:55 +01:00
parent 2570023590
commit 2d45f125a7
12 changed files with 305 additions and 258 deletions

View File

@ -44,15 +44,12 @@ public class QueryCommunityAPI {
} }
public static String communityDatasource(String id, String baseURL) throws IOException { public static String communityDatasource(String id, String baseURL) throws IOException {
return get(baseURL + id + "/datasources"); return get(baseURL + id + "/datasources");
} }
public static String communityPropagationOrganization(String id, String baseURL) throws IOException { public static String communityPropagationOrganization(String id, String baseURL) throws IOException {
return get(baseURL + id + "/propagationOrganizations"); return get(baseURL + id + "/propagationOrganizations");
@ -86,16 +83,21 @@ public class QueryCommunityAPI {
return body; return body;
} }
public static String subcommunityDatasource(String communityId, String subcommunityId, String baseURL) throws IOException { public static String subcommunityDatasource(String communityId, String subcommunityId, String baseURL)
throws IOException {
return get(baseURL + communityId + "/subcommunities/datasources?subCommunityId=" + subcommunityId); return get(baseURL + communityId + "/subcommunities/datasources?subCommunityId=" + subcommunityId);
} }
public static String subcommunityPropagationOrganization(String communityId, String subcommunityId , String baseURL) throws IOException { public static String subcommunityPropagationOrganization(String communityId, String subcommunityId, String baseURL)
throws IOException {
return get(baseURL + communityId + "/subcommunities/propagationOrganizations?subCommunityId=" + subcommunityId); return get(baseURL + communityId + "/subcommunities/propagationOrganizations?subCommunityId=" + subcommunityId);
} }
public static String subcommunityProjects(String communityId, String subcommunityId, String page, String size, String baseURL) throws IOException { public static String subcommunityProjects(String communityId, String subcommunityId, String page, String size,
return get(baseURL + communityId + "/subcommunities/projects/" + page + "/" + size + "?subCommunityId=" + subcommunityId); String baseURL) throws IOException {
return get(
baseURL + communityId + "/subcommunities/projects/" + page + "/" + size + "?subCommunityId="
+ subcommunityId);
} }
public static String propagationDatasourceCommunityMap(String baseURL) throws IOException { public static String propagationDatasourceCommunityMap(String baseURL) throws IOException {

View File

@ -6,10 +6,10 @@ import java.io.Serializable;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import com.fasterxml.jackson.core.type.TypeReference;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
@ -38,69 +38,69 @@ public class Utils implements Serializable {
} }
@FunctionalInterface @FunctionalInterface
private interface DatasourceQueryFunction{ private interface DatasourceQueryFunction {
String query(); String query();
} }
//PROJECT METHODS // PROJECT METHODS
public static CommunityEntityMap getProjectCommunityMap(String baseURL) throws IOException { public static CommunityEntityMap getProjectCommunityMap(String baseURL) throws IOException {
CommunityEntityMap projectMap = new CommunityEntityMap(); CommunityEntityMap projectMap = new CommunityEntityMap();
getValidCommunities(baseURL) getValidCommunities(baseURL)
.forEach(community -> { .forEach(community -> {
addRelevantProjects(community.getId(), baseURL, projectMap); addRelevantProjects(community.getId(), baseURL, projectMap);
try { try {
List<SubCommunityModel> subcommunities = getSubcommunities(community.getId(), baseURL); List<SubCommunityModel> subcommunities = getSubcommunities(community.getId(), baseURL);
subcommunities.forEach(sc -> addRelevantProjects(community.getId(), sc.getSubCommunityId(), baseURL, projectMap)); subcommunities
} catch (IOException e) { .forEach(
throw new RuntimeException(e); sc -> addRelevantProjects(community.getId(), sc.getSubCommunityId(), baseURL, projectMap));
} } catch (IOException e) {
}); throw new RuntimeException(e);
}
});
return projectMap; return projectMap;
} }
private static void addRelevantProjects( private static void addRelevantProjects(
String communityId, String communityId,
String baseURL, String baseURL,
CommunityEntityMap communityEntityMap CommunityEntityMap communityEntityMap) {
) {
fetchAndProcessProjects( fetchAndProcessProjects(
(page, size) -> { (page, size) -> {
try { try {
return QueryCommunityAPI.communityProjects(communityId, String.valueOf(page), String.valueOf(size), baseURL); return QueryCommunityAPI
} catch (IOException e) { .communityProjects(communityId, String.valueOf(page), String.valueOf(size), baseURL);
throw new RuntimeException(e); } catch (IOException e) {
} throw new RuntimeException(e);
}, }
communityId, },
communityEntityMap communityId,
); communityEntityMap);
} }
private static void addRelevantProjects( private static void addRelevantProjects(
String communityId, String communityId,
String subcommunityId, String subcommunityId,
String baseURL, String baseURL,
CommunityEntityMap communityEntityMap CommunityEntityMap communityEntityMap) {
) {
fetchAndProcessProjects( fetchAndProcessProjects(
(page, size) -> { (page, size) -> {
try { try {
return QueryCommunityAPI.subcommunityProjects(communityId, subcommunityId, String.valueOf(page), String.valueOf(size), baseURL); return QueryCommunityAPI
} catch (IOException e) { .subcommunityProjects(
throw new RuntimeException(e); communityId, subcommunityId, String.valueOf(page), String.valueOf(size), baseURL);
} } catch (IOException e) {
}, throw new RuntimeException(e);
communityId, }
communityEntityMap },
); communityId,
communityEntityMap);
} }
private static void fetchAndProcessProjects( private static void fetchAndProcessProjects(
ProjectQueryFunction projectQueryFunction, ProjectQueryFunction projectQueryFunction,
String communityId, String communityId,
CommunityEntityMap communityEntityMap CommunityEntityMap communityEntityMap) {
) {
int page = 0; int page = 0;
final int size = 100; final int size = 100;
ContentModel contentModel; ContentModel contentModel;
@ -111,9 +111,13 @@ public class Utils implements Serializable {
contentModel = MAPPER.readValue(response, ContentModel.class); contentModel = MAPPER.readValue(response, ContentModel.class);
if (!contentModel.getContent().isEmpty()) { if (!contentModel.getContent().isEmpty()) {
contentModel.getContent().forEach(project ->communityEntityMap.add( contentModel
ModelSupport.getIdPrefix(Project.class) + "|" + project.getOpenaireId(), communityId) .getContent()
); .forEach(
project -> communityEntityMap
.add(
ModelSupport.getIdPrefix(Project.class) + "|" + project.getOpenaireId(),
communityId));
} }
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException("Error processing projects for community: " + communityId, e); throw new RuntimeException("Error processing projects for community: " + communityId, e);
@ -123,28 +127,28 @@ public class Utils implements Serializable {
} }
private static List<Provider> getCommunityContentProviders( private static List<Provider> getCommunityContentProviders(
DatasourceQueryFunction datasourceQueryFunction DatasourceQueryFunction datasourceQueryFunction) {
) { try {
try { String response = datasourceQueryFunction.query();
String response = datasourceQueryFunction.query(); List<CommunityContentprovider> datasourceList = MAPPER
List<CommunityContentprovider> datasourceList = MAPPER.readValue(response, new TypeReference<List<CommunityContentprovider>>() { .readValue(response, new TypeReference<List<CommunityContentprovider>>() {
}); });
return datasourceList.stream().map(d -> { return datasourceList.stream().map(d -> {
if (d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled())) if (d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled()))
return null; return null;
Provider p = new Provider(); Provider p = new Provider();
p.setOpenaireId(ModelSupport.getIdPrefix(Datasource.class) + "|" + d.getOpenaireId()); p.setOpenaireId(ModelSupport.getIdPrefix(Datasource.class) + "|" + d.getOpenaireId());
p.setSelectionConstraints(d.getSelectioncriteria()); p.setSelectionConstraints(d.getSelectioncriteria());
if (p.getSelectionConstraints() != null) if (p.getSelectionConstraints() != null)
p.getSelectionConstraints().setSelection(resolver); p.getSelectionConstraints().setSelection(resolver);
return p; return p;
}) })
.filter(Objects::nonNull) .filter(Objects::nonNull)
.collect(Collectors.toList()); .collect(Collectors.toList());
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException("Error processing datasource information: " + e); throw new RuntimeException("Error processing datasource information: " + e);
} }
} }
@ -156,13 +160,14 @@ public class Utils implements Serializable {
*/ */
public static List<CommunityModel> getValidCommunities(String baseURL) throws IOException { public static List<CommunityModel> getValidCommunities(String baseURL) throws IOException {
List<CommunityModel> listCommunity = MAPPER List<CommunityModel> listCommunity = MAPPER
.readValue(QueryCommunityAPI.communities(baseURL), new TypeReference<List<CommunityModel>>() { .readValue(QueryCommunityAPI.communities(baseURL), new TypeReference<List<CommunityModel>>() {
}); });
return listCommunity.stream() return listCommunity
.filter( .stream()
community -> !community.getStatus().equals("hidden") && .filter(
(community.getType().equals("ri") || community.getType().equals("community"))) community -> !community.getStatus().equals("hidden") &&
.collect(Collectors.toList()); (community.getType().equals("ri") || community.getType().equals("community")))
.collect(Collectors.toList());
} }
/** /**
@ -172,16 +177,16 @@ public class Utils implements Serializable {
* @return the community set with information from the community model and for the content providers * @return the community set with information from the community model and for the content providers
*/ */
private static Community getCommunity(String baseURL, CommunityModel communityModel) { private static Community getCommunity(String baseURL, CommunityModel communityModel) {
Community community = getCommunity(communityModel); Community community = getCommunity(communityModel);
community.setProviders(getCommunityContentProviders(()->{ community.setProviders(getCommunityContentProviders(() -> {
try { try {
return QueryCommunityAPI.communityDatasource(community.getId(),baseURL); return QueryCommunityAPI.communityDatasource(community.getId(), baseURL);
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
})); }));
return community; return community;
} }
/** /**
@ -191,9 +196,10 @@ public class Utils implements Serializable {
* @param sc * @param sc
* @return * @return
*/ */
private static @NotNull Community getSubCommunityConfiguration(String baseURL, String communityId, SubCommunityModel sc) { private static @NotNull Community getSubCommunityConfiguration(String baseURL, String communityId,
SubCommunityModel sc) {
Community c = getCommunity(sc); Community c = getCommunity(sc);
c.setProviders(getCommunityContentProviders(()->{ c.setProviders(getCommunityContentProviders(() -> {
try { try {
return QueryCommunityAPI.subcommunityDatasource(communityId, sc.getSubCommunityId(), baseURL); return QueryCommunityAPI.subcommunityDatasource(communityId, sc.getSubCommunityId(), baseURL);
} catch (IOException e) { } catch (IOException e) {
@ -210,12 +216,13 @@ public class Utils implements Serializable {
* @param baseURL * @param baseURL
* @return * @return
*/ */
private static List<Community> getSubCommunity(String communityId, String baseURL){ private static List<Community> getSubCommunity(String communityId, String baseURL) {
try { try {
List<SubCommunityModel> subcommunities = getSubcommunities(communityId, baseURL); List<SubCommunityModel> subcommunities = getSubcommunities(communityId, baseURL);
return subcommunities.stream().map(sc -> return subcommunities
getSubCommunityConfiguration(baseURL, communityId, sc)) .stream()
.collect(Collectors.toList()); .map(sc -> getSubCommunityConfiguration(baseURL, communityId, sc))
.collect(Collectors.toList());
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
@ -244,22 +251,21 @@ public class Utils implements Serializable {
return new CommunityConfiguration(communities); return new CommunityConfiguration(communities);
} }
/** /**
* filles the common fields in the community model for both the communityconfiguration and the subcommunityconfiguration * filles the common fields in the community model for both the communityconfiguration and the subcommunityconfiguration
* @param input * @param input
* @return * @return
* @param <C> * @param <C>
*/ */
private static <C extends CommonConfigurationModel> Community getCommonConfiguration(C input){ private static <C extends CommonConfigurationModel> Community getCommonConfiguration(C input) {
Community c = new Community(); Community c = new Community();
c.setZenodoCommunities(input.getOtherZenodoCommunities()); c.setZenodoCommunities(input.getOtherZenodoCommunities());
if (StringUtils.isNotBlank(input.getZenodoCommunity())) if (StringUtils.isNotBlank(input.getZenodoCommunity()))
c.getZenodoCommunities().add(input.getZenodoCommunity()); c.getZenodoCommunities().add(input.getZenodoCommunity());
c.setSubjects(input.getSubjects()); c.setSubjects(input.getSubjects());
if(input.getFos() != null) if (input.getFos() != null)
c.getSubjects().addAll(input.getFos()); c.getSubjects().addAll(input.getFos());
if(input.getSdg()!=null) if (input.getSdg() != null)
c.getSubjects().addAll(input.getSdg()); c.getSubjects().addAll(input.getSdg());
if (input.getAdvancedConstraints() != null) { if (input.getAdvancedConstraints() != null) {
c.setConstraints(input.getAdvancedConstraints()); c.setConstraints(input.getAdvancedConstraints());
@ -287,38 +293,49 @@ public class Utils implements Serializable {
} }
public static List<SubCommunityModel> getSubcommunities(String communityId, String baseURL) throws IOException { public static List<SubCommunityModel> getSubcommunities(String communityId, String baseURL) throws IOException {
return MAPPER.readValue(QueryCommunityAPI.subcommunities(communityId, baseURL), new TypeReference<List<SubCommunityModel>>() { return MAPPER
}); .readValue(
QueryCommunityAPI.subcommunities(communityId, baseURL), new TypeReference<List<SubCommunityModel>>() {
});
} }
public static CommunityEntityMap getOrganizationCommunityMap(String baseURL) throws IOException { public static CommunityEntityMap getOrganizationCommunityMap(String baseURL) throws IOException {
return MAPPER.readValue(QueryCommunityAPI.propagationOrganizationCommunityMap(baseURL), CommunityEntityMap.class); return MAPPER
.readValue(QueryCommunityAPI.propagationOrganizationCommunityMap(baseURL), CommunityEntityMap.class);
} }
public static CommunityEntityMap getDatasourceCommunityMap(String baseURL) throws IOException { public static CommunityEntityMap getDatasourceCommunityMap(String baseURL) throws IOException {
return MAPPER.readValue(QueryCommunityAPI.propagationDatasourceCommunityMap(baseURL), CommunityEntityMap.class); return MAPPER.readValue(QueryCommunityAPI.propagationDatasourceCommunityMap(baseURL), CommunityEntityMap.class);
} }
private static void getRelatedOrganizations(String communityId, String baseURL, CommunityEntityMap communityEntityMap){ private static void getRelatedOrganizations(String communityId, String baseURL,
CommunityEntityMap communityEntityMap) {
try { try {
List<String> associatedOrgs = MAPPER List<String> associatedOrgs = MAPPER
.readValue( .readValue(
QueryCommunityAPI.communityPropagationOrganization(communityId, baseURL), EntityIdentifierList.class); QueryCommunityAPI.communityPropagationOrganization(communityId, baseURL),
associatedOrgs.forEach(o -> communityEntityMap.add(ModelSupport.getIdPrefix(Organization.class) + "|" + o, communityId )); EntityIdentifierList.class);
associatedOrgs
.forEach(
o -> communityEntityMap.add(ModelSupport.getIdPrefix(Organization.class) + "|" + o, communityId));
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
private static void getRelatedOrganizations(String communityId, String subcommunityId, String baseURL, CommunityEntityMap communityEntityMap){ private static void getRelatedOrganizations(String communityId, String subcommunityId, String baseURL,
CommunityEntityMap communityEntityMap) {
try { try {
List<String> associatedOrgs = MAPPER List<String> associatedOrgs = MAPPER
.readValue( .readValue(
QueryCommunityAPI.subcommunityPropagationOrganization(communityId, subcommunityId, baseURL), EntityIdentifierList.class); QueryCommunityAPI.subcommunityPropagationOrganization(communityId, subcommunityId, baseURL),
associatedOrgs.forEach(o -> communityEntityMap.add(ModelSupport.getIdPrefix(Organization.class) + "|" + o, communityId )); EntityIdentifierList.class);
associatedOrgs
.forEach(
o -> communityEntityMap.add(ModelSupport.getIdPrefix(Organization.class) + "|" + o, communityId));
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
@ -331,37 +348,39 @@ public class Utils implements Serializable {
public static CommunityEntityMap getCommunityOrganization(String baseURL) throws IOException { public static CommunityEntityMap getCommunityOrganization(String baseURL) throws IOException {
CommunityEntityMap organizationMap = new CommunityEntityMap(); CommunityEntityMap organizationMap = new CommunityEntityMap();
List<CommunityModel> communityList = getValidCommunities(baseURL); List<CommunityModel> communityList = getValidCommunities(baseURL);
communityList.forEach(community -> { communityList.forEach(community -> {
getRelatedOrganizations(community.getId(), baseURL, organizationMap ); getRelatedOrganizations(community.getId(), baseURL, organizationMap);
try { try {
List<SubCommunityModel> subcommunities = getSubcommunities(community.getId(), baseURL); List<SubCommunityModel> subcommunities = getSubcommunities(community.getId(), baseURL);
subcommunities.forEach(sc -> getRelatedOrganizations(community.getId(), sc.getSubCommunityId(), baseURL, organizationMap)); subcommunities
} catch (IOException e) { .forEach(
throw new RuntimeException(e); sc -> getRelatedOrganizations(
} community.getId(), sc.getSubCommunityId(), baseURL, organizationMap));
}); } catch (IOException e) {
throw new RuntimeException(e);
}
});
return organizationMap; return organizationMap;
} }
public static List<String> getCommunityIdList(String baseURL) throws IOException { public static List<String> getCommunityIdList(String baseURL) throws IOException {
return getValidCommunities(baseURL) return getValidCommunities(baseURL)
.stream() .stream()
.flatMap(communityModel -> { .flatMap(communityModel -> {
List<String> communityIds = new ArrayList<>(); List<String> communityIds = new ArrayList<>();
communityIds.add(communityModel.getId()); communityIds.add(communityModel.getId());
try { try {
Utils.getSubcommunities(communityModel.getId(), baseURL).forEach(sc -> communityIds.add(sc.getSubCommunityId())); Utils
} catch (IOException e) { .getSubcommunities(communityModel.getId(), baseURL)
throw new RuntimeException(e); .forEach(sc -> communityIds.add(sc.getSubCommunityId()));
} } catch (IOException e) {
return communityIds.stream(); throw new RuntimeException(e);
}) }
return communityIds.stream();
})
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
} }

View File

@ -1,74 +1,76 @@
package eu.dnetlib.dhp.api.model;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties; package eu.dnetlib.dhp.api.model;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
import java.io.Serializable; import java.io.Serializable;
import java.util.List; import java.util.List;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
@JsonIgnoreProperties(ignoreUnknown = true) @JsonIgnoreProperties(ignoreUnknown = true)
public class CommonConfigurationModel implements Serializable { public class CommonConfigurationModel implements Serializable {
private String zenodoCommunity; private String zenodoCommunity;
private List<String> subjects; private List<String> subjects;
private List<String> otherZenodoCommunities; private List<String> otherZenodoCommunities;
private List<String> fos; private List<String> fos;
private List<String> sdg; private List<String> sdg;
private SelectionConstraints advancedConstraints; private SelectionConstraints advancedConstraints;
private SelectionConstraints removeConstraints; private SelectionConstraints removeConstraints;
public String getZenodoCommunity() { public String getZenodoCommunity() {
return zenodoCommunity; return zenodoCommunity;
} }
public void setZenodoCommunity(String zenodoCommunity) { public void setZenodoCommunity(String zenodoCommunity) {
this.zenodoCommunity = zenodoCommunity; this.zenodoCommunity = zenodoCommunity;
} }
public List<String> getSubjects() { public List<String> getSubjects() {
return subjects; return subjects;
} }
public void setSubjects(List<String> subjects) { public void setSubjects(List<String> subjects) {
this.subjects = subjects; this.subjects = subjects;
} }
public List<String> getOtherZenodoCommunities() { public List<String> getOtherZenodoCommunities() {
return otherZenodoCommunities; return otherZenodoCommunities;
} }
public void setOtherZenodoCommunities(List<String> otherZenodoCommunities) { public void setOtherZenodoCommunities(List<String> otherZenodoCommunities) {
this.otherZenodoCommunities = otherZenodoCommunities; this.otherZenodoCommunities = otherZenodoCommunities;
} }
public List<String> getFos() { public List<String> getFos() {
return fos; return fos;
} }
public void setFos(List<String> fos) { public void setFos(List<String> fos) {
this.fos = fos; this.fos = fos;
} }
public List<String> getSdg() { public List<String> getSdg() {
return sdg; return sdg;
} }
public void setSdg(List<String> sdg) { public void setSdg(List<String> sdg) {
this.sdg = sdg; this.sdg = sdg;
} }
public SelectionConstraints getRemoveConstraints() { public SelectionConstraints getRemoveConstraints() {
return removeConstraints; return removeConstraints;
} }
public void setRemoveConstraints(SelectionConstraints removeConstraints) { public void setRemoveConstraints(SelectionConstraints removeConstraints) {
this.removeConstraints = removeConstraints; this.removeConstraints = removeConstraints;
} }
public SelectionConstraints getAdvancedConstraints() { public SelectionConstraints getAdvancedConstraints() {
return advancedConstraints; return advancedConstraints;
} }
public void setAdvancedConstraints(SelectionConstraints advancedConstraints) { public void setAdvancedConstraints(SelectionConstraints advancedConstraints) {
this.advancedConstraints = advancedConstraints; this.advancedConstraints = advancedConstraints;
} }
} }

View File

@ -19,12 +19,11 @@ public class CommunityEntityMap extends HashMap<String, List<String>> {
return super.get(key); return super.get(key);
} }
public void add(String key, String value){ public void add(String key, String value) {
if(!super.containsKey(key)){ if (!super.containsKey(key)) {
super.put(key, new ArrayList<>()); super.put(key, new ArrayList<>());
} }
super.get(key).add(value); super.get(key).add(value);
} }
} }

View File

@ -1,18 +1,19 @@
package eu.dnetlib.dhp.api.model;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties; package eu.dnetlib.dhp.api.model;
import java.io.Serializable; import java.io.Serializable;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
@JsonIgnoreProperties(ignoreUnknown = true) @JsonIgnoreProperties(ignoreUnknown = true)
public class SubCommunityModel extends CommonConfigurationModel implements Serializable { public class SubCommunityModel extends CommonConfigurationModel implements Serializable {
private String subCommunityId; private String subCommunityId;
public String getSubCommunityId() { public String getSubCommunityId() {
return subCommunityId; return subCommunityId;
} }
public void setSubCommunityId(String subCommunityId) { public void setSubCommunityId(String subCommunityId) {
this.subCommunityId = subCommunityId; this.subCommunityId = subCommunityId;
} }
} }

View File

@ -8,8 +8,6 @@ import java.nio.charset.StandardCharsets;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import eu.dnetlib.dhp.common.action.ReadDatasourceMasterDuplicateFromDB;
import eu.dnetlib.dhp.common.action.model.MasterDuplicate;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
@ -31,6 +29,8 @@ import eu.dnetlib.dhp.api.model.CommunityEntityMap;
import eu.dnetlib.dhp.api.model.EntityCommunities; import eu.dnetlib.dhp.api.model.EntityCommunities;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.bulktag.community.*; import eu.dnetlib.dhp.bulktag.community.*;
import eu.dnetlib.dhp.common.action.ReadDatasourceMasterDuplicateFromDB;
import eu.dnetlib.dhp.common.action.model.MasterDuplicate;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
@ -96,7 +96,6 @@ public class SparkBulkTagJob {
log.info("dbPassword: {}", dbPassword); log.info("dbPassword: {}", dbPassword);
final String hdfsPath = outputPath + "masterDuplicate"; final String hdfsPath = outputPath + "masterDuplicate";
log.info("hdfsPath: {}", hdfsPath); log.info("hdfsPath: {}", hdfsPath);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
CommunityConfiguration cc; CommunityConfiguration cc;
@ -123,69 +122,85 @@ public class SparkBulkTagJob {
spark, inputPath, outputPath, protoMap, cc); spark, inputPath, outputPath, protoMap, cc);
execEntityTag( execEntityTag(
spark, inputPath + "organization", outputPath + "organization", spark, inputPath + "organization", outputPath + "organization",
mapWithRepresentativeOrganization(spark, inputPath + "relation", Utils.getOrganizationCommunityMap(baseURL)), mapWithRepresentativeOrganization(
Organization.class, TaggingConstants.CLASS_ID_ORGANIZATION, spark, inputPath + "relation", Utils.getOrganizationCommunityMap(baseURL)),
Organization.class, TaggingConstants.CLASS_ID_ORGANIZATION,
TaggingConstants.CLASS_NAME_BULKTAG_ORGANIZATION); TaggingConstants.CLASS_NAME_BULKTAG_ORGANIZATION);
execEntityTag( execEntityTag(
spark, inputPath + "project", outputPath + "project", spark, inputPath + "project", outputPath + "project",
Utils.getProjectCommunityMap(baseURL), Utils.getProjectCommunityMap(baseURL),
Project.class, TaggingConstants.CLASS_ID_PROJECT, TaggingConstants.CLASS_NAME_BULKTAG_PROJECT); Project.class, TaggingConstants.CLASS_ID_PROJECT, TaggingConstants.CLASS_NAME_BULKTAG_PROJECT);
execEntityTag( execEntityTag(
spark, inputPath + "datasource", outputPath + "datasource", spark, inputPath + "datasource", outputPath + "datasource",
mapWithMasterDatasource(spark, hdfsPath, Utils.getDatasourceCommunityMap(baseURL)), mapWithMasterDatasource(spark, hdfsPath, Utils.getDatasourceCommunityMap(baseURL)),
Datasource.class, TaggingConstants.CLASS_ID_DATASOURCE, TaggingConstants.CLASS_NAME_BULKTAG_DATASOURCE); Datasource.class, TaggingConstants.CLASS_ID_DATASOURCE,
TaggingConstants.CLASS_NAME_BULKTAG_DATASOURCE);
}); });
} }
private static CommunityEntityMap mapWithMasterDatasource(SparkSession spark, String masterDuplicatePath, CommunityEntityMap datasourceCommunityMap) { private static CommunityEntityMap mapWithMasterDatasource(SparkSession spark, String masterDuplicatePath,
//load master-duplicate relations CommunityEntityMap datasourceCommunityMap) {
Dataset<MasterDuplicate> masterDuplicate = spark.read().schema(Encoders.bean(MasterDuplicate.class).schema()) // load master-duplicate relations
.json(masterDuplicatePath).as(Encoders.bean(MasterDuplicate.class)); Dataset<MasterDuplicate> masterDuplicate = spark
//list of id for the communities related entities .read()
.schema(Encoders.bean(MasterDuplicate.class).schema())
.json(masterDuplicatePath)
.as(Encoders.bean(MasterDuplicate.class));
// list of id for the communities related entities
List<String> idList = entityIdList(ModelSupport.idPrefixMap.get(Datasource.class), datasourceCommunityMap); List<String> idList = entityIdList(ModelSupport.idPrefixMap.get(Datasource.class), datasourceCommunityMap);
//find the mapping with the representative entity if any // find the mapping with the representative entity if any
Dataset<String> datasourceIdentifiers = spark.createDataset(idList, Encoders.STRING()); Dataset<String> datasourceIdentifiers = spark.createDataset(idList, Encoders.STRING());
List<Row> mappedKeys = datasourceIdentifiers.join(masterDuplicate, datasourceIdentifiers.col("_1").equalTo(masterDuplicate.col("duplicateId")), "left_semi") List<Row> mappedKeys = datasourceIdentifiers
.selectExpr("masterId as source", "duplicateId as target").collectAsList(); .join(
masterDuplicate, datasourceIdentifiers.col("_1").equalTo(masterDuplicate.col("duplicateId")),
"left_semi")
.selectExpr("masterId as source", "duplicateId as target")
.collectAsList();
//remap the entity with its corresponding representative // remap the entity with its corresponding representative
return remapCommunityEntityMap(datasourceCommunityMap,mappedKeys); return remapCommunityEntityMap(datasourceCommunityMap, mappedKeys);
} }
private static List<String> entityIdList(String idPrefixMap, CommunityEntityMap datasourceCommunityMap) { private static List<String> entityIdList(String idPrefixMap, CommunityEntityMap datasourceCommunityMap) {
final String prefix = idPrefixMap + "|"; final String prefix = idPrefixMap + "|";
return datasourceCommunityMap.keySet() return datasourceCommunityMap
.stream() .keySet()
.map(key -> prefix + key) .stream()
.collect(Collectors.toList()); .map(key -> prefix + key)
.collect(Collectors.toList());
} }
private static CommunityEntityMap mapWithRepresentativeOrganization(SparkSession spark, String relationPath private static CommunityEntityMap mapWithRepresentativeOrganization(SparkSession spark, String relationPath,
, CommunityEntityMap organizationCommunityMap) { CommunityEntityMap organizationCommunityMap) {
Dataset<Row> mergesRel = spark.read().schema(Encoders.bean(Relation.class).schema()) Dataset<Row> mergesRel = spark
.json(relationPath) .read()
.filter("datainfo.deletedbyinference != true and relClass = 'merges") .schema(Encoders.bean(Relation.class).schema())
.select("source", "target"); .json(relationPath)
.filter("datainfo.deletedbyinference != true and relClass = 'merges")
.select("source", "target");
List<String> idList = entityIdList(ModelSupport.idPrefixMap.get(Organization.class), organizationCommunityMap); List<String> idList = entityIdList(ModelSupport.idPrefixMap.get(Organization.class), organizationCommunityMap);
Dataset<String> organizationIdentifiers = spark.createDataset(idList, Encoders.STRING()); Dataset<String> organizationIdentifiers = spark.createDataset(idList, Encoders.STRING());
List<Row> mappedKeys = organizationIdentifiers.join(mergesRel, organizationIdentifiers.col("_1").equalTo(mergesRel.col("target")), "left_semi") List<Row> mappedKeys = organizationIdentifiers
.select("source", "target").collectAsList(); .join(mergesRel, organizationIdentifiers.col("_1").equalTo(mergesRel.col("target")), "left_semi")
.select("source", "target")
.collectAsList();
return remapCommunityEntityMap(organizationCommunityMap, mappedKeys); return remapCommunityEntityMap(organizationCommunityMap, mappedKeys);
} }
private static CommunityEntityMap remapCommunityEntityMap(CommunityEntityMap entityCommunityMap, List<Row> mappedKeys) { private static CommunityEntityMap remapCommunityEntityMap(CommunityEntityMap entityCommunityMap,
List<Row> mappedKeys) {
for (Row mappedEntry : mappedKeys) { for (Row mappedEntry : mappedKeys) {
String oldKey = mappedEntry.getAs("target"); String oldKey = mappedEntry.getAs("target");
String newKey = mappedEntry.getAs("source"); String newKey = mappedEntry.getAs("source");
//inserts the newKey in the map while removing the oldKey. The remove produces the value in the Map, which // inserts the newKey in the map while removing the oldKey. The remove produces the value in the Map, which
//will be used as the newValue parameter of the BiFunction // will be used as the newValue parameter of the BiFunction
entityCommunityMap.merge(newKey, entityCommunityMap.remove(oldKey), (existing, newValue) ->{ entityCommunityMap.merge(newKey, entityCommunityMap.remove(oldKey), (existing, newValue) -> {
existing.addAll(newValue); existing.addAll(newValue);
return existing; return existing;
}); });
@ -255,7 +270,6 @@ public class SparkBulkTagJob {
.json(inputPath); .json(inputPath);
} }
private static void extendCommunityConfigurationForEOSC(SparkSession spark, String inputPath, private static void extendCommunityConfigurationForEOSC(SparkSession spark, String inputPath,
CommunityConfiguration cc) { CommunityConfiguration cc) {
@ -293,11 +307,6 @@ public class SparkBulkTagJob {
ProtoMap protoMappingParams, ProtoMap protoMappingParams,
CommunityConfiguration communityConfiguration) { CommunityConfiguration communityConfiguration) {
try {
System.out.println(new ObjectMapper().writeValueAsString(protoMappingParams));
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
ModelSupport.entityTypes ModelSupport.entityTypes
.keySet() .keySet()
.parallelStream() .parallelStream()

View File

@ -43,7 +43,7 @@ public class Community implements Serializable {
} }
public void setSubjects(List<String> subjects) { public void setSubjects(List<String> subjects) {
if(subjects != null) if (subjects != null)
this.subjects = subjects; this.subjects = subjects;
} }
@ -60,7 +60,7 @@ public class Community implements Serializable {
} }
public void setZenodoCommunities(List<String> zenodoCommunities) { public void setZenodoCommunities(List<String> zenodoCommunities) {
if(zenodoCommunities!=null) if (zenodoCommunities != null)
this.zenodoCommunities = zenodoCommunities; this.zenodoCommunities = zenodoCommunities;
} }

View File

@ -52,7 +52,7 @@ public class PrepareResultCommunitySet {
log.info("baseURL: {}", baseURL); log.info("baseURL: {}", baseURL);
final CommunityEntityMap organizationMap = Utils.getCommunityOrganization(baseURL); final CommunityEntityMap organizationMap = Utils.getCommunityOrganization(baseURL);
//final CommunityEntityMap organizationMap = Utils.getOrganizationCommunityMap(baseURL); // final CommunityEntityMap organizationMap = Utils.getOrganizationCommunityMap(baseURL);
log.info("organizationMap: {}", new Gson().toJson(organizationMap)); log.info("organizationMap: {}", new Gson().toJson(organizationMap));
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();

View File

@ -28,4 +28,7 @@ blacklist=empty
allowedpids=orcid;orcid_pending allowedpids=orcid;orcid_pending
baseURL = https://services.openaire.eu/openaire/community/ baseURL = https://services.openaire.eu/openaire/community/
iterations=1 iterations=1
dbUrl=jdbc:postgresql://beta.services.openaire.eu:5432/dnet_openaireplus
dbUser=dnet
dbPassword=dnetPwd

View File

@ -170,6 +170,18 @@
<name>pathMap</name> <name>pathMap</name>
<value>${pathMap}</value> <value>${pathMap}</value>
</property> </property>
<property>
<name>dbUrl</name>
<value>${dbUrl}</value>
</property>
<property>
<name>dbUser</name>
<value>${dbUser}</value>
</property>
<property>
<name>dbPassword</name>
<value>${dbPassword}</value>
</property>
</configuration> </configuration>
</sub-workflow> </sub-workflow>
<ok to="affiliation_inst_repo" /> <ok to="affiliation_inst_repo" />

View File

@ -17,15 +17,15 @@
<value>undelete</value> <value>undelete</value>
</property> </property>
<property> <property>
<name>dbUrl></name> <name>dbUrl</name>
</property> </property>
<property> <property>
<name>dbUser></name> <name>dbUser</name>
</property> </property>
<property> <property>
<name>dbPassword></name> <name>dbPassword</name>
</property> </property>

View File

@ -8,10 +8,6 @@ import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.List; import java.util.List;
import com.fasterxml.jackson.core.JsonProcessingException;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.api.model.SubCommunityModel;
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@ -31,9 +27,13 @@ import org.junit.jupiter.api.Test;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson; import com.google.gson.Gson;
import eu.dnetlib.dhp.api.Utils;
import eu.dnetlib.dhp.api.model.SubCommunityModel;
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
import eu.dnetlib.dhp.bulktag.community.ProtoMap; import eu.dnetlib.dhp.bulktag.community.ProtoMap;
import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.*;
@ -1957,13 +1957,13 @@ public class BulkTagJobTest {
List<SubCommunityModel> subcommunities = Utils.getSubcommunities("clarin", baseURL); List<SubCommunityModel> subcommunities = Utils.getSubcommunities("clarin", baseURL);
CommunityConfiguration tmp = Utils.getCommunityConfiguration(baseURL); CommunityConfiguration tmp = Utils.getCommunityConfiguration(baseURL);
tmp.getCommunities().keySet().forEach(c -> { tmp.getCommunities().keySet().forEach(c -> {
try { try {
System.out.println(new ObjectMapper().writeValueAsString(tmp.getCommunities().get(c))); System.out.println(new ObjectMapper().writeValueAsString(tmp.getCommunities().get(c)));
} catch (JsonProcessingException e) { } catch (JsonProcessingException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
}); });
System.out.println(new ObjectMapper().writeValueAsString(Utils.getOrganizationCommunityMap(baseURL))); System.out.println(new ObjectMapper().writeValueAsString(Utils.getOrganizationCommunityMap(baseURL)));
} }