package eu.dnetlib.openaire.community.importer; import java.time.LocalDate; import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.transaction.Transactional; import org.apache.commons.lang3.BooleanUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dom4j.DocumentHelper; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.stereotype.Service; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.miscutils.functional.hash.Hashing; import eu.dnetlib.openaire.community.CommunityService; import eu.dnetlib.openaire.community.model.DbOrganization; import eu.dnetlib.openaire.community.repository.DbOrganizationRepository; import eu.dnetlib.openaire.community.utils.CommunityMappingUtils; import eu.dnetlib.openaire.exporter.exceptions.CommunityException; import eu.dnetlib.openaire.exporter.model.community.CommunityClaimType; import eu.dnetlib.openaire.exporter.model.community.CommunityContentprovider; import eu.dnetlib.openaire.exporter.model.community.CommunityDetails; import eu.dnetlib.openaire.exporter.model.community.CommunityMembershipType; import eu.dnetlib.openaire.exporter.model.community.CommunityOrganization; import eu.dnetlib.openaire.exporter.model.community.CommunityProject; import eu.dnetlib.openaire.exporter.model.community.CommunityStatus; import eu.dnetlib.openaire.exporter.model.community.CommunityType; import eu.dnetlib.openaire.exporter.model.community.SubCommunity; import eu.dnetlib.openaire.exporter.model.community.selectioncriteria.SelectionCriteria; import eu.dnetlib.openaire.exporter.model.context.Category; import eu.dnetlib.openaire.exporter.model.context.Concept; import eu.dnetlib.openaire.exporter.model.context.Context; import eu.dnetlib.openaire.exporter.model.context.Param; @Service @ConditionalOnProperty(value = "openaire.exporter.enable.community.import", havingValue = "true") public class CommunityImporterService { // common public final static String OPENAIRE_ID = "openaireId"; public final static String PIPE_SEPARATOR = "||"; public final static String ID_SEPARATOR = "::"; public final static String CSV_DELIMITER = ","; public final static String CLABEL = "label"; // id suffixes public final static String PROJECTS_ID_SUFFIX = ID_SEPARATOR + "projects"; public final static String CONTENTPROVIDERS_ID_SUFFIX = ID_SEPARATOR + "contentproviders"; public final static String ZENODOCOMMUNITY_ID_SUFFIX = ID_SEPARATOR + "zenodocommunities"; public final static String ORGANIZATION_ID_SUFFIX = ID_SEPARATOR + "organizations"; // community summary public final static String CSUMMARY_DESCRIPTION = "description"; public final static String CSUMMARY_LOGOURL = "logourl"; public final static String CSUMMARY_STATUS = "status"; public final static String CSUMMARY_NAME = "name"; public final static String CSUMMARY_MANAGER = "manager"; public final static String CSUMMARY_ZENODOC = "zenodoCommunity"; // community profile public final static String CPROFILE_SUBJECT = "subject"; public final static String CPROFILE_CREATIONDATE = "creationdate"; public final static String CPROFILE_FOS = "fos"; public final static String CPROFILE_SDG = "sdg"; public final static String CPROFILE_ADVANCED_CONSTRAINT = "advancedConstraints"; public final static String CPROFILE_REMOVE_CONSTRAINT = "removeConstraints"; public final static String CPROFILE_SUGGESTED_ACKNOWLEDGEMENT = "suggestedAcknowledgement"; // community project public final static String CPROJECT_FUNDER = "funder"; public final static String CPROJECT_NUMBER = "CD_PROJECT_NUMBER"; public final static String CPROJECT_FULLNAME = "projectfullname"; public final static String CPROJECT_ACRONYM = "acronym"; // community content provider public final static String CCONTENTPROVIDER_NAME = "name"; public final static String CCONTENTPROVIDER_OFFICIALNAME = "officialname"; public final static String CCONTENTPROVIDER_ENABLED = "enabled"; public final static String CCONTENTPROVIDERENABLED_DEFAULT = "true"; public final static String CCONTENTPROVIDER_SELCRITERIA = "selcriteria"; // community zenodo community public final static String CZENODOCOMMUNITY_ID = "zenodoid"; // community organization public final static String CORGANIZATION_NAME = "name"; public final static String CORGANIZATION_LOGOURL = "logourl"; public final static String CORGANIZATION_WEBSITEURL = "websiteurl"; @Autowired private DbOrganizationRepository dbOrganizationRepository; @Autowired private CommunityService service; @Autowired private JdbcTemplate jdbcTemplate; private static final Log log = LogFactory.getLog(CommunityImporterService.class); public List importPropagationOrganizationsFromProfile(final String xml, final boolean simulation) throws Exception { final String json = DocumentHelper.parseText(xml) .selectSingleNode("//NODE[@name='setPropagationOrganizationCommunityMap']//PARAM[@name='parameterValue']") .getText(); final List list = new ObjectMapper() .readValue(json, new TypeReference>>() {}) .entrySet() .stream() .flatMap(e -> e.getValue() .stream() .map(community -> { if (e.getKey().contains("|")) { return new DbOrganization(community, StringUtils.substringAfter(e.getKey(), "|")); } return new DbOrganization(community, e.getKey()); })) .collect(Collectors.toList()); if (!simulation) { list.forEach(o -> { try { dbOrganizationRepository.save(o); } catch (final Throwable e) { log.error("ERROR saving org: " + o); } }); } return list; } @Transactional public void importCommunity(final Context context) { try { final CommunityDetails community = asCommunityDetails(context); final List datasources = getCommunityInfo(context, CONTENTPROVIDERS_ID_SUFFIX, c -> asCommunityDataprovider(context.getId(), c)) .stream() .map(o -> { if (o.getOpenaireId() == null) { log.warn("Openaire ID is missing, organization: " + o.getOfficialname()); } else if (o.getOpenaireId().contains("|")) { o.setOpenaireId(StringUtils.substringAfter(o.getOpenaireId(), "|")); } return o; }) .filter(o -> o.getOpenaireId() != null) .collect(Collectors.toList()); final List projects = getCommunityInfo(context, PROJECTS_ID_SUFFIX, c -> asCommunityProject(context.getId(), c)) .stream() .map(p -> { if (p.getOpenaireId() == null) { if ("EC".equalsIgnoreCase(p.getFunder())) { final String ns = findNamespaceForECProject(p.getGrantId()); if (ns != null) { p.setOpenaireId(ns + "::" + Hashing.md5(p.getGrantId())); } else { log.warn("EC project not in the db: " + p.getGrantId()); } } else if ("NSF".equalsIgnoreCase(p.getFunder())) { p.setOpenaireId("nsf_________::" + Hashing.md5(p.getGrantId())); } else if ("NIH".equalsIgnoreCase(p.getFunder())) { p.setOpenaireId("nih_________::" + Hashing.md5(p.getGrantId())); } else { log.warn("Openaire ID is missing, funder: " + p.getFunder()); } } else if (p.getOpenaireId().contains("|")) { p.setOpenaireId(StringUtils.substringAfter(p.getOpenaireId(), "|")); } return p; }) .filter(p -> p.getOpenaireId() != null) .collect(Collectors.toList()); final List orgs = getCommunityInfo(context, ORGANIZATION_ID_SUFFIX, c -> asCommunityOrganization(context.getId(), c)); final List otherZenodoCommunities = getCommunityInfo(context, ZENODOCOMMUNITY_ID_SUFFIX, CommunityImporterService::asZenodoCommunity); community.setOtherZenodoCommunities(otherZenodoCommunities); final List subs = context.getCategories() .entrySet() .stream() .filter(e -> !(context.getId() + CONTENTPROVIDERS_ID_SUFFIX).equals(e.getKey())) .filter(e -> !(context.getId() + PROJECTS_ID_SUFFIX).equals(e.getKey())) .filter(e -> !(context.getId() + ORGANIZATION_ID_SUFFIX).equals(e.getKey())) .filter(e -> !(context.getId() + ZENODOCOMMUNITY_ID_SUFFIX).equals(e.getKey())) .map(Entry::getValue) .map(cat -> asSubCommunities(context.getId(), null, cat.getLabel(), cat.getConcepts())) .flatMap(List::stream) .collect(Collectors.toList()); service.saveCommunity(community); service.addCommunityProjects(context.getId(), projects.toArray(new CommunityProject[projects.size()])); service.addCommunityDatasources(context.getId(), datasources.toArray(new CommunityContentprovider[datasources.size()])); service.addCommunityOrganizations(context.getId(), orgs.toArray(new CommunityOrganization[orgs.size()])); service.addSubCommunities(context.getId(), subs.toArray(new SubCommunity[subs.size()])); } catch ( final Exception e) { throw new RuntimeException("Error importing community: " + context.getId(), e); } } private List getCommunityInfo(final Context context, final String idSuffix, final Function mapping) throws CommunityException { if (context != null) { final Map categories = context.getCategories(); final Category category = categories.get(context.getId() + idSuffix); if (category != null) { return category.getConcepts() .stream() .map(mapping) .collect(Collectors.toList()); } } return Lists.newArrayList(); } private static CommunityDetails asCommunityDetails(final Context c) { final CommunityDetails details = new CommunityDetails(); details.setId(c.getId()); details.setShortName(c.getLabel()); details.setLastUpdateDate(CommunityMappingUtils.asLocalDateTime(c.getLastUpdateDate())); details.setCreationDate(CommunityMappingUtils.asLocalDateTime(c.getCreationDate())); details.setQueryId(c.getId() + PIPE_SEPARATOR + c.getLabel()); details.setType(CommunityType.valueOf(c.getType())); details.setMembership(CommunityMembershipType.open); details.setClaim(CommunityClaimType.all); details.setDescription(asCsv(CSUMMARY_DESCRIPTION, c.getParams())); details.setLogoUrl(asCsv(CSUMMARY_LOGOURL, c.getParams())); final String status = firstValue(CSUMMARY_STATUS, c.getParams()); if (StringUtils.isNotBlank(status)) { details.setStatus(CommunityStatus.valueOf(status)); } else { details.setStatus(CommunityStatus.hidden); } details.setName(StringUtils.firstNonBlank(asCsv(CSUMMARY_NAME, c.getParams()), c.getLabel())); details.setZenodoCommunity(asCsv(CSUMMARY_ZENODOC, c.getParams())); details.setSubjects(splitValues(asValues(CPROFILE_SUBJECT, c.getParams()), CSV_DELIMITER)); details.setFos(splitValues(asValues(CPROFILE_FOS, c.getParams()), CSV_DELIMITER)); details.setSdg(splitValues(asValues(CPROFILE_SDG, c.getParams()), CSV_DELIMITER)); // In the map the string is the serialization of the json representing the selection criteria so it is a valid json details.setAdvancedConstraints(SelectionCriteria.fromJson(asCsv(CPROFILE_ADVANCED_CONSTRAINT, c.getParams()))); // In the map the string is the serialization of the json representing the selection criteria so it is a valid json details.setRemoveConstraints(SelectionCriteria.fromJson(asCsv(CPROFILE_REMOVE_CONSTRAINT, c.getParams()))); details.setSuggestedAcknowledgements(splitValues(asValues(CPROFILE_SUGGESTED_ACKNOWLEDGEMENT, c.getParams()), CSV_DELIMITER)); details.setPlan(null); try { details.setCreationDate(CommunityMappingUtils.asLocalDateTime(asCsv(CPROFILE_CREATIONDATE, c.getParams()))); } catch (final Exception e) { log.debug("Exception on date format: " + e.getMessage()); } return details; } private static CommunityProject asCommunityProject(final String communityId, final Concept c) { final List p = c.getParams(); final CommunityProject project = new CommunityProject(); project.setCommunityId(communityId); project.setOpenaireId(firstValue(OPENAIRE_ID, p)); project.setFunder(firstValue(CPROJECT_FUNDER, p)); project.setGrantId(firstValue(CPROJECT_NUMBER, p)); project.setName(firstValue(CPROJECT_FULLNAME, p)); project.setAcronym(firstValue(CPROJECT_ACRONYM, p)); project.setAvailableSince(LocalDate.of(2017, 2, 25)); // Birillo Birth Date return project; } private static CommunityContentprovider asCommunityDataprovider(final String communityId, final Concept c) { final List p = c.getParams(); final CommunityContentprovider d = new CommunityContentprovider(); d.setCommunityId(communityId); d.setOpenaireId(firstValue(OPENAIRE_ID, p)); d.setName(firstValue(CCONTENTPROVIDER_NAME, p)); d.setOfficialname(firstValue(CCONTENTPROVIDER_OFFICIALNAME, p)); d.setEnabled(BooleanUtils.toBoolean(firstValue(CCONTENTPROVIDER_ENABLED, p))); d.setSelectioncriteria(SelectionCriteria.fromJson(firstValue(CCONTENTPROVIDER_SELCRITERIA, p))); d.setDeposit(false); d.setMessage(null); return d; } private static CommunityOrganization asCommunityOrganization(final String id, final Concept c) { final List p = c.getParams(); final CommunityOrganization o = new CommunityOrganization(); o.setCommunityId(id); o.setName(firstValue(CORGANIZATION_NAME, p)); o.setLogo_url(getDecodedUrl(firstValue(CORGANIZATION_LOGOURL, p))); o.setWebsite_url(getDecodedUrl(firstValue(CORGANIZATION_WEBSITEURL, p))); return o; } private static String asZenodoCommunity(final Concept c) { return firstValue(CZENODOCOMMUNITY_ID, c.getParams()); } private static List asSubCommunities(final String communityId, final String parent, final String category, final List concepts) { final List list = new ArrayList<>(); for (final Concept c : concepts) { final SubCommunity sc = new SubCommunity(); sc.setSubCommunityId(c.getId()); sc.setCommunityId(communityId); sc.setParent(parent); sc.setCategory(category); sc.setLabel(c.getLabel()); sc.setParams(c.getParams()); sc.setClaim(c.isClaim()); sc.setBrowsable(false); list.add(sc); list.addAll(asSubCommunities(communityId, c.getId(), category, c.getConcepts())); } return list; } private String findNamespaceForECProject(final String code) { final List list = jdbcTemplate.queryForList("SELECT substr(id, 1, 12) from projects where code = ? and id like 'corda%'", String.class, code); return list.isEmpty() ? null : list.get(0); } private static String getDecodedUrl(final String encoded_url) { if (encoded_url == null || encoded_url.startsWith("http")) { return encoded_url; } try { return new String(Base64.getDecoder().decode(encoded_url)); } catch (final Exception e) { log.warn("Invalid base64: " + encoded_url); return encoded_url; } } private static List splitValues(final Stream stream, final String separator) { return stream.map(s -> s.split(separator)) .map(Arrays::asList) .flatMap(List::stream) .filter(StringUtils::isNotBlank) .map(StringUtils::trim) .collect(Collectors.toList()); } private static String firstValue(final String name, final List params) { return asValues(name, params).findFirst().orElse(null); } private static String asCsv(final String name, final List params) { return asValues(name, params).collect(Collectors.joining(CSV_DELIMITER)); } private static Stream asValues(final String name, final List params) { return params == null ? Stream.empty() : params.stream() .filter(p -> p != null) .filter(p -> StringUtils.isNotBlank(p.getName())) .filter(p -> p.getName().trim().equals(name.trim())) .map(Param::getValue) .map(StringUtils::trim) .distinct(); } protected DbOrganizationRepository getDbOrganizationRepository() { return dbOrganizationRepository; } protected void setDbOrganizationRepository(final DbOrganizationRepository dbOrganizationRepository) { this.dbOrganizationRepository = dbOrganizationRepository; } protected CommunityService getService() { return service; } protected void setService(final CommunityService service) { this.service = service; } protected JdbcTemplate getJdbcTemplate() { return jdbcTemplate; } protected void setJdbcTemplate(final JdbcTemplate jdbcTemplate) { this.jdbcTemplate = jdbcTemplate; } }