dnet-applications/apps/dnet-exporter-api/src/main/java/eu/dnetlib/openaire/community/importer/CommunityImporterService.java

408 lines
16 KiB
Java

package eu.dnetlib.openaire.community.importer;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.transaction.Transactional;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.DocumentHelper;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists;
import eu.dnetlib.miscutils.functional.hash.Hashing;
import eu.dnetlib.openaire.community.CommunityService;
import eu.dnetlib.openaire.community.model.DbOrganization;
import eu.dnetlib.openaire.community.repository.DbOrganizationRepository;
import eu.dnetlib.openaire.community.utils.CommunityMappingUtils;
import eu.dnetlib.openaire.exporter.exceptions.CommunityException;
import eu.dnetlib.openaire.exporter.model.community.CommunityClaimType;
import eu.dnetlib.openaire.exporter.model.community.CommunityContentprovider;
import eu.dnetlib.openaire.exporter.model.community.CommunityDetails;
import eu.dnetlib.openaire.exporter.model.community.CommunityMembershipType;
import eu.dnetlib.openaire.exporter.model.community.CommunityOrganization;
import eu.dnetlib.openaire.exporter.model.community.CommunityProject;
import eu.dnetlib.openaire.exporter.model.community.CommunityStatus;
import eu.dnetlib.openaire.exporter.model.community.CommunityType;
import eu.dnetlib.openaire.exporter.model.community.SubCommunity;
import eu.dnetlib.openaire.exporter.model.community.selectioncriteria.SelectionCriteria;
import eu.dnetlib.openaire.exporter.model.context.Category;
import eu.dnetlib.openaire.exporter.model.context.Concept;
import eu.dnetlib.openaire.exporter.model.context.Context;
import eu.dnetlib.openaire.exporter.model.context.Param;
@Service
@ConditionalOnProperty(value = "openaire.exporter.enable.community.import", havingValue = "true")
public class CommunityImporterService {
// common
public final static String OPENAIRE_ID = "openaireId";
public final static String PIPE_SEPARATOR = "||";
public final static String ID_SEPARATOR = "::";
public final static String CSV_DELIMITER = ",";
public final static String CLABEL = "label";
// id suffixes
public final static String PROJECTS_ID_SUFFIX = ID_SEPARATOR + "projects";
public final static String CONTENTPROVIDERS_ID_SUFFIX = ID_SEPARATOR + "contentproviders";
public final static String ZENODOCOMMUNITY_ID_SUFFIX = ID_SEPARATOR + "zenodocommunities";
public final static String ORGANIZATION_ID_SUFFIX = ID_SEPARATOR + "organizations";
// community summary
public final static String CSUMMARY_DESCRIPTION = "description";
public final static String CSUMMARY_LOGOURL = "logourl";
public final static String CSUMMARY_STATUS = "status";
public final static String CSUMMARY_NAME = "name";
public final static String CSUMMARY_MANAGER = "manager";
public final static String CSUMMARY_ZENODOC = "zenodoCommunity";
// community profile
public final static String CPROFILE_SUBJECT = "subject";
public final static String CPROFILE_CREATIONDATE = "creationdate";
public final static String CPROFILE_FOS = "fos";
public final static String CPROFILE_SDG = "sdg";
public final static String CPROFILE_ADVANCED_CONSTRAINT = "advancedConstraints";
public final static String CPROFILE_REMOVE_CONSTRAINT = "removeConstraints";
public final static String CPROFILE_SUGGESTED_ACKNOWLEDGEMENT = "suggestedAcknowledgement";
// community project
public final static String CPROJECT_FUNDER = "funder";
public final static String CPROJECT_NUMBER = "CD_PROJECT_NUMBER";
public final static String CPROJECT_FULLNAME = "projectfullname";
public final static String CPROJECT_ACRONYM = "acronym";
// community content provider
public final static String CCONTENTPROVIDER_NAME = "name";
public final static String CCONTENTPROVIDER_OFFICIALNAME = "officialname";
public final static String CCONTENTPROVIDER_ENABLED = "enabled";
public final static String CCONTENTPROVIDERENABLED_DEFAULT = "true";
public final static String CCONTENTPROVIDER_SELCRITERIA = "selcriteria";
// community zenodo community
public final static String CZENODOCOMMUNITY_ID = "zenodoid";
// community organization
public final static String CORGANIZATION_NAME = "name";
public final static String CORGANIZATION_LOGOURL = "logourl";
public final static String CORGANIZATION_WEBSITEURL = "websiteurl";
@Autowired
private DbOrganizationRepository dbOrganizationRepository;
@Autowired
private CommunityService service;
@Autowired
private JdbcTemplate jdbcTemplate;
private static final Log log = LogFactory.getLog(CommunityImporterService.class);
public List<DbOrganization> importPropagationOrganizationsFromProfile(final String xml, final boolean simulation) throws Exception {
final String json = DocumentHelper.parseText(xml)
.selectSingleNode("//NODE[@name='setPropagationOrganizationCommunityMap']//PARAM[@name='parameterValue']")
.getText();
final List<DbOrganization> list = new ObjectMapper()
.readValue(json, new TypeReference<Map<String, List<String>>>() {})
.entrySet()
.stream()
.flatMap(e -> e.getValue()
.stream()
.map(community -> {
if (e.getKey().contains("|")) {
return new DbOrganization(community, StringUtils.substringAfter(e.getKey(), "|"));
} else {
return new DbOrganization(community, e.getKey());
}
}))
.collect(Collectors.toList());
if (!simulation) {
list.forEach(o -> {
try {
dbOrganizationRepository.save(o);
} catch (final Throwable e) {
log.error("ERROR saving org: " + o);
}
});
}
return list;
}
@Transactional
public void importCommunity(final Context context) {
try {
final CommunityDetails community = asCommunityDetails(context);
final List<CommunityContentprovider> datasources =
getCommunityInfo(context, CONTENTPROVIDERS_ID_SUFFIX, c -> asCommunityDataprovider(context.getId(), c))
.stream()
.map(o -> {
if (o.getOpenaireId() == null) {
log.warn("Openaire ID is missing, organization: " + o.getOfficialname());
} else if (o.getOpenaireId().contains("|")) {
o.setOpenaireId(StringUtils.substringAfter(o.getOpenaireId(), "|"));
}
return o;
})
.filter(o -> o.getOpenaireId() != null)
.collect(Collectors.toList());
final List<CommunityProject> projects =
getCommunityInfo(context, PROJECTS_ID_SUFFIX, c -> asCommunityProject(context.getId(), c))
.stream()
.map(p -> {
if (p.getOpenaireId() == null) {
if (p.getFunder().equalsIgnoreCase("EC")) {
final String ns = findNamespaceForECProject(p.getGrantId());
if (ns != null) {
p.setOpenaireId(ns + "::" + Hashing.md5(p.getGrantId()));
} else {
log.warn("EC project not in the db: " + p.getGrantId());
}
} else if (p.getFunder().equalsIgnoreCase("NSF")) {
p.setOpenaireId("nsf_________::" + Hashing.md5(p.getGrantId()));
} else if (p.getFunder().equalsIgnoreCase("NIH")) {
p.setOpenaireId("nih_________::" + Hashing.md5(p.getGrantId()));
} else {
log.warn("Openaire ID is missing, funder: " + p.getFunder());
}
} else if (p.getOpenaireId().contains("|")) {
p.setOpenaireId(StringUtils.substringAfter(p.getOpenaireId(), "|"));
}
return p;
})
.filter(p -> p.getOpenaireId() != null)
.collect(Collectors.toList());
final List<CommunityOrganization> orgs =
getCommunityInfo(context, ORGANIZATION_ID_SUFFIX, c -> asCommunityOrganization(context.getId(), c));
final List<String> otherZenodoCommunities =
getCommunityInfo(context, ZENODOCOMMUNITY_ID_SUFFIX, c -> asZenodoCommunity(c));
community.setOtherZenodoCommunities(otherZenodoCommunities);
final List<SubCommunity> subs = context.getCategories()
.entrySet()
.stream()
.filter(e -> !e.getKey().equals(context.getId() + CONTENTPROVIDERS_ID_SUFFIX))
.filter(e -> !e.getKey().equals(context.getId() + PROJECTS_ID_SUFFIX))
.filter(e -> !e.getKey().equals(context.getId() + ORGANIZATION_ID_SUFFIX))
.filter(e -> !e.getKey().equals(context.getId() + ZENODOCOMMUNITY_ID_SUFFIX))
.map(e -> e.getValue())
.map(cat -> asSubCommunities(context.getId(), null, cat.getLabel(), cat.getConcepts()))
.flatMap(List::stream)
.collect(Collectors.toList());
service.saveCommunity(community);
service.addCommunityProjects(context.getId(), projects.toArray(new CommunityProject[projects.size()]));
service.addCommunityContentProviders(context.getId(), datasources.toArray(new CommunityContentprovider[datasources.size()]));
service.addCommunityOrganizations(context.getId(), orgs.toArray(new CommunityOrganization[orgs.size()]));
service.addSubCommunities(context.getId(), subs.toArray(new SubCommunity[subs.size()]));
} catch (
final Exception e) {
throw new RuntimeException("Error importing community: " + context.getId(), e);
}
}
private <R> List<R> getCommunityInfo(final Context context, final String idSuffix, final Function<Concept, R> mapping)
throws CommunityException {
if (context != null) {
final Map<String, Category> categories = context.getCategories();
final Category category = categories.get(context.getId() + idSuffix);
if (category != null) { return category.getConcepts()
.stream()
.map(mapping)
.collect(Collectors.toList()); }
}
return Lists.newArrayList();
}
private static CommunityDetails asCommunityDetails(final Context c) {
final CommunityDetails details = new CommunityDetails();
details.setId(c.getId());
details.setShortName(c.getLabel());
details.setLastUpdateDate(CommunityMappingUtils.asLocalDateTime(c.getLastUpdateDate()));
details.setCreationDate(CommunityMappingUtils.asLocalDateTime(c.getCreationDate()));
details.setQueryId(c.getId() + PIPE_SEPARATOR + c.getLabel());
details.setType(CommunityType.valueOf(c.getType()));
details.setMembership(CommunityMembershipType.open);
details.setClaim(CommunityClaimType.all);
details.setDescription(asCsv(CSUMMARY_DESCRIPTION, c.getParams()));
details.setLogoUrl(asCsv(CSUMMARY_LOGOURL, c.getParams()));
final String status = firstValue(CSUMMARY_STATUS, c.getParams());
if (StringUtils.isNotBlank(status)) {
details.setStatus(CommunityStatus.valueOf(status));
} else {
details.setStatus(CommunityStatus.hidden);
}
details.setName(StringUtils.firstNonBlank(asCsv(CSUMMARY_NAME, c.getParams()), c.getLabel()));
details.setZenodoCommunity(asCsv(CSUMMARY_ZENODOC, c.getParams()));
details.setSubjects(splitValues(asValues(CPROFILE_SUBJECT, c.getParams()), CSV_DELIMITER));
details.setFos(splitValues(asValues(CPROFILE_FOS, c.getParams()), CSV_DELIMITER));
details.setSdg(splitValues(asValues(CPROFILE_SDG, c.getParams()), CSV_DELIMITER));
// In the map the string is the serialization of the json representing the selection criteria so it is a valid json
details.setAdvancedConstraints(SelectionCriteria.fromJson(asCsv(CPROFILE_ADVANCED_CONSTRAINT, c.getParams())));
// In the map the string is the serialization of the json representing the selection criteria so it is a valid json
details.setRemoveConstraints(SelectionCriteria.fromJson(asCsv(CPROFILE_REMOVE_CONSTRAINT, c.getParams())));
details.setSuggestedAcknowledgements(splitValues(asValues(CPROFILE_SUGGESTED_ACKNOWLEDGEMENT, c.getParams()), CSV_DELIMITER));
details.setPlan(null);
try {
details.setCreationDate(CommunityMappingUtils.asLocalDateTime(asCsv(CPROFILE_CREATIONDATE, c.getParams())));
} catch (final Exception e) {
log.debug("Exception on date format: " + e.getMessage());
}
return details;
}
private static CommunityProject asCommunityProject(final String communityId, final Concept c) {
final List<Param> p = c.getParams();
final CommunityProject project = new CommunityProject();
project.setCommunityId(communityId);
project.setOpenaireId(firstValue(OPENAIRE_ID, p));
project.setFunder(firstValue(CPROJECT_FUNDER, p));
project.setGrantId(firstValue(CPROJECT_NUMBER, p));
project.setName(firstValue(CPROJECT_FULLNAME, p));
project.setAcronym(firstValue(CPROJECT_ACRONYM, p));
project.setAvailableSince(LocalDate.of(2017, 2, 25)); // Birillo Birth Date
return project;
}
private static CommunityContentprovider asCommunityDataprovider(final String communityId, final Concept c) {
final List<Param> p = c.getParams();
final CommunityContentprovider d = new CommunityContentprovider();
d.setCommunityId(communityId);
d.setOpenaireId(firstValue(OPENAIRE_ID, p));
d.setName(firstValue(CCONTENTPROVIDER_NAME, p));
d.setOfficialname(firstValue(CCONTENTPROVIDER_OFFICIALNAME, p));
d.setEnabled(BooleanUtils.toBoolean(firstValue(CCONTENTPROVIDER_ENABLED, p)));
d.setSelectioncriteria(SelectionCriteria.fromJson(firstValue(CCONTENTPROVIDER_SELCRITERIA, p)));
return d;
}
private static CommunityOrganization asCommunityOrganization(final String id, final Concept c) {
final List<Param> p = c.getParams();
final CommunityOrganization o = new CommunityOrganization();
o.setCommunityId(id);
o.setName(firstValue(CORGANIZATION_NAME, p));
o.setLogo_url(getDecodedUrl(firstValue(CORGANIZATION_LOGOURL, p)));
o.setWebsite_url(getDecodedUrl(firstValue(CORGANIZATION_WEBSITEURL, p)));
return o;
}
private static String asZenodoCommunity(final Concept c) {
return firstValue(CZENODOCOMMUNITY_ID, c.getParams());
}
private static List<SubCommunity> asSubCommunities(final String communityId, final String parent, final String category, final List<Concept> concepts) {
final List<SubCommunity> list = new ArrayList<>();
for (final Concept c : concepts) {
final SubCommunity sc = new SubCommunity();
sc.setSubCommunityId(c.getId());
sc.setCommunityId(communityId);
sc.setParent(parent);
sc.setCategory(category);
sc.setLabel(c.getLabel());
sc.setParams(c.getParams());
sc.setClaim(c.isClaim());
sc.setBrowsable(false);
list.add(sc);
list.addAll(asSubCommunities(communityId, c.getId(), category, c.getConcepts()));
}
return list;
}
private String findNamespaceForECProject(final String code) {
final List<String> list =
jdbcTemplate.queryForList("SELECT substr(id, 1, 12) from projects where code = ? and id like 'corda%'", String.class, code);
return list.isEmpty() ? null : list.get(0);
}
private static String getDecodedUrl(final String encoded_url) {
if (encoded_url == null || encoded_url.startsWith("http")) { return encoded_url; }
try {
return new String(Base64.getDecoder().decode(encoded_url));
} catch (final Exception e) {
log.warn("Invalid base64: " + encoded_url);
return encoded_url;
}
}
private static List<String> splitValues(final Stream<String> stream, final String separator) {
return stream.map(s -> s.split(separator))
.map(Arrays::asList)
.flatMap(List::stream)
.filter(StringUtils::isNotBlank)
.map(StringUtils::trim)
.collect(Collectors.toList());
}
private static String firstValue(final String name, final List<Param> params) {
return asValues(name, params).findFirst().orElse(null);
}
private static String asCsv(final String name, final List<Param> params) {
return asValues(name, params).collect(Collectors.joining(CSV_DELIMITER));
}
private static Stream<String> asValues(final String name, final List<Param> params) {
return params == null ? Stream.empty()
: params.stream()
.filter(p -> p != null)
.filter(p -> StringUtils.isNotBlank(p.getName()))
.filter(p -> p.getName().trim().equals(name.trim()))
.map(Param::getValue)
.map(StringUtils::trim)
.distinct();
}
protected DbOrganizationRepository getDbOrganizationRepository() {
return dbOrganizationRepository;
}
protected void setDbOrganizationRepository(final DbOrganizationRepository dbOrganizationRepository) {
this.dbOrganizationRepository = dbOrganizationRepository;
}
protected CommunityService getService() {
return service;
}
protected void setService(final CommunityService service) {
this.service = service;
}
protected JdbcTemplate getJdbcTemplate() {
return jdbcTemplate;
}
protected void setJdbcTemplate(final JdbcTemplate jdbcTemplate) {
this.jdbcTemplate = jdbcTemplate;
}
}