package eu.dnetlib.openaire.community.importer; import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; import java.util.List; import java.util.Map; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.transaction.Transactional; import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dom4j.DocumentHelper; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.stereotype.Service; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import eu.dnetlib.miscutils.functional.hash.Hashing; import eu.dnetlib.openaire.community.CommunityService; import eu.dnetlib.openaire.community.model.DbOrganization; import eu.dnetlib.openaire.community.repository.DbOrganizationRepository; import eu.dnetlib.openaire.community.utils.CommunityMappingUtils; import eu.dnetlib.openaire.exporter.exceptions.CommunityException; import eu.dnetlib.openaire.exporter.model.community.CommunityClaimType; import eu.dnetlib.openaire.exporter.model.community.CommunityContentprovider; import eu.dnetlib.openaire.exporter.model.community.CommunityDetails; import eu.dnetlib.openaire.exporter.model.community.CommunityMembershipType; import eu.dnetlib.openaire.exporter.model.community.CommunityOrganization; import eu.dnetlib.openaire.exporter.model.community.CommunityProject; import eu.dnetlib.openaire.exporter.model.community.CommunityStatus; import eu.dnetlib.openaire.exporter.model.community.CommunityType; import eu.dnetlib.openaire.exporter.model.community.SubCommunity; import eu.dnetlib.openaire.exporter.model.community.selectioncriteria.SelectionCriteria; import eu.dnetlib.openaire.exporter.model.context.Category; import eu.dnetlib.openaire.exporter.model.context.Concept; import eu.dnetlib.openaire.exporter.model.context.Context; import eu.dnetlib.openaire.exporter.model.context.Param; @Service public class CommunityImporterService { // common private final static String OPENAIRE_ID = "openaireId"; private final static String PIPE_SEPARATOR = "||"; private final static String ID_SEPARATOR = "::"; private final static String CSV_DELIMITER = ","; private final static String CLABEL = "label"; // id suffixes private final static String PROJECTS_ID_SUFFIX = ID_SEPARATOR + "projects"; private final static String CONTENTPROVIDERS_ID_SUFFIX = ID_SEPARATOR + "contentproviders"; private final static String ZENODOCOMMUNITY_ID_SUFFIX = ID_SEPARATOR + "zenodocommunities"; private final static String ORGANIZATION_ID_SUFFIX = ID_SEPARATOR + "organizations"; // community summary private final static String CSUMMARY_DESCRIPTION = "description"; private final static String CSUMMARY_LOGOURL = "logourl"; private final static String CSUMMARY_STATUS = "status"; private final static String CSUMMARY_NAME = "name"; private final static String CSUMMARY_MANAGER = "manager"; private final static String CSUMMARY_ZENODOC = "zenodoCommunity"; // community profile private final static String CPROFILE_SUBJECT = "subject"; private final static String CPROFILE_CREATIONDATE = "creationdate"; private final static String CPROFILE_FOS = "fos"; private final static String CPROFILE_SDG = "sdg"; private final static String CPROFILE_ADVANCED_CONSTRAINT = "advancedConstraints"; private final static String CPROFILE_REMOVE_CONSTRAINT = "removeConstraints"; // community project private final static String CPROJECT_FUNDER = "funder"; private final static String CPROJECT_NUMBER = "CD_PROJECT_NUMBER"; private final static String CPROJECT_FULLNAME = "projectfullname"; private final static String CPROJECT_ACRONYM = "acronym"; // community content provider private final static String CCONTENTPROVIDER_NAME = "name"; private final static String CCONTENTPROVIDER_OFFICIALNAME = "officialname"; private final static String CCONTENTPROVIDER_ENABLED = "enabled"; private final static String CCONTENTPROVIDERENABLED_DEFAULT = "true"; private final static String CCONTENTPROVIDER_SELCRITERIA = "selcriteria"; // community zenodo community private final static String CZENODOCOMMUNITY_ID = "zenodoid"; // community organization private final static String CORGANIZATION_NAME = "name"; private final static String CORGANIZATION_LOGOURL = "logourl"; private final static String CORGANIZATION_WEBSITEURL = "websiteurl"; @Autowired private DbOrganizationRepository dbOrganizationRepository; @Autowired private CommunityService service; @Autowired private JdbcTemplate jdbcTemplate; private static final Log log = LogFactory.getLog(CommunityImporterService.class); public List importPropagationOrganizationsFromProfile(final String xml, final boolean simulation) throws Exception { final String json = DocumentHelper.parseText(xml) .selectSingleNode("//NODE[@name='setPropagationOrganizationCommunityMap']//PARAM[@name='parameterValue']") .getText(); final List list = new ObjectMapper() .readValue(json, new TypeReference>>() {}) .entrySet() .stream() .flatMap(e -> e.getValue() .stream() .map(community -> { if (e.getKey().contains("|")) { return new DbOrganization(community, StringUtils.substringAfter(e.getKey(), "|")); } else { return new DbOrganization(community, e.getKey()); } })) .collect(Collectors.toList()); if (!simulation) { list.forEach(o -> { try { dbOrganizationRepository.save(o); } catch (final Throwable e) { log.error("ERROR saving org: " + o); } }); } return list; } @Transactional public void importCommunity(final Context context) { try { final CommunityDetails community = asCommunityDetails(context); final List datasources = getCommunityInfo(context, CONTENTPROVIDERS_ID_SUFFIX, c -> asCommunityDataprovider(context.getId(), c)) .stream() .map(o -> { if (o.getOpenaireId() == null) { log.warn("Openaire ID is missing, organization: " + o.getOfficialname()); } else if (o.getOpenaireId().contains("|")) { o.setOpenaireId(StringUtils.substringAfter(o.getOpenaireId(), "|")); } return o; }) .filter(o -> o.getOpenaireId() != null) .collect(Collectors.toList()); final List projects = getCommunityInfo(context, PROJECTS_ID_SUFFIX, c -> asCommunityProject(context.getId(), c)) .stream() .map(p -> { if (p.getOpenaireId() == null) { if (p.getFunder().equalsIgnoreCase("EC")) { final String ns = findNamespaceForECProject(p.getGrantId()); if (ns != null) { p.setOpenaireId(ns + "::" + Hashing.md5(p.getGrantId())); } else { log.error("EC project not in the db: " + p.getGrantId()); } } else if (p.getFunder().equalsIgnoreCase("NSF")) { p.setOpenaireId("nsf_________::" + Hashing.md5(p.getGrantId())); } else if (p.getFunder().equalsIgnoreCase("NIH")) { p.setOpenaireId("nih_________::" + Hashing.md5(p.getGrantId())); } else { log.warn("Openaire ID is missing, funder: " + p.getFunder()); } } else if (p.getOpenaireId().contains("|")) { p.setOpenaireId(StringUtils.substringAfter(p.getOpenaireId(), "|")); } return p; }) .filter(p -> p.getOpenaireId() != null) .collect(Collectors.toList()); final List orgs = getCommunityInfo(context, ORGANIZATION_ID_SUFFIX, c -> asCommunityOrganization(context.getId(), c)); final List otherZenodoCommunities = getCommunityInfo(context, ZENODOCOMMUNITY_ID_SUFFIX, c -> asZenodoCommunity(c)); community.setOtherZenodoCommunities(otherZenodoCommunities); final List subs = context.getCategories() .entrySet() .stream() .filter(e -> !e.getKey().equals(context.getId() + CONTENTPROVIDERS_ID_SUFFIX)) .filter(e -> !e.getKey().equals(context.getId() + PROJECTS_ID_SUFFIX)) .filter(e -> !e.getKey().equals(context.getId() + ORGANIZATION_ID_SUFFIX)) .filter(e -> !e.getKey().equals(context.getId() + ZENODOCOMMUNITY_ID_SUFFIX)) .map(e -> e.getValue()) .map(cat -> asSubCommunities(context.getId(), null, cat.getLabel(), cat.getConcepts())) .flatMap(List::stream) .collect(Collectors.toList()); service.saveCommunity(community); service.addCommunityProjects(context.getId(), projects.toArray(new CommunityProject[projects.size()])); service.addCommunityContentProviders(context.getId(), datasources.toArray(new CommunityContentprovider[datasources.size()])); service.addCommunityOrganizations(context.getId(), orgs.toArray(new CommunityOrganization[orgs.size()])); service.addSubCommunities(subs.toArray(new SubCommunity[subs.size()])); } catch ( final Exception e) { throw new RuntimeException("Error importing community: " + context.getId(), e); } } private List getCommunityInfo(final Context context, final String idSuffix, final Function mapping) throws CommunityException { if (context != null) { final Map categories = context.getCategories(); final Category category = categories.get(context.getId() + idSuffix); if (category != null) { return category.getConcepts() .stream() .map(mapping) .collect(Collectors.toList()); } } return Lists.newArrayList(); } private static CommunityDetails asCommunityDetails(final Context c) { final CommunityDetails details = new CommunityDetails(); details.setId(c.getId()); details.setShortName(c.getLabel()); details.setLastUpdateDate(CommunityMappingUtils.asLocalDateTime(c.getLastUpdateDate())); details.setCreationDate(CommunityMappingUtils.asLocalDateTime(c.getCreationDate())); details.setQueryId(c.getId() + PIPE_SEPARATOR + c.getLabel()); details.setType(CommunityType.valueOf(c.getType())); details.setMembership(CommunityMembershipType.open); details.setClaim(CommunityClaimType.all); final Map> params = c.getParams(); if (params.containsKey(CSUMMARY_DESCRIPTION)) { details.setDescription(asCsv(params.get(CSUMMARY_DESCRIPTION))); } if (params.containsKey(CSUMMARY_LOGOURL)) { details.setLogoUrl(asCsv(params.get(CSUMMARY_LOGOURL))); } if (params.containsKey(CSUMMARY_STATUS)) { details.setStatus(CommunityStatus.valueOf(firstValue(params, CSUMMARY_STATUS))); } if (params.containsKey(CSUMMARY_NAME)) { details.setName(asCsv(params.get(CSUMMARY_NAME))); } else { details.setName(c.getLabel()); } if (params.containsKey(CSUMMARY_ZENODOC)) { details.setZenodoCommunity(asCsv(params.get(CSUMMARY_ZENODOC))); } if (params.containsKey(CPROFILE_SUBJECT)) { details.setSubjects(splitValues(asValues(params.get(CPROFILE_SUBJECT)), CSV_DELIMITER)); } if (params.containsKey(CPROFILE_FOS)) { details.setFos(splitValues(asValues(params.get(CPROFILE_FOS)), CSV_DELIMITER)); } if (params.containsKey(CPROFILE_SDG)) { details.setSdg(splitValues(asValues(params.get(CPROFILE_SDG)), CSV_DELIMITER)); } if (params.containsKey(CPROFILE_ADVANCED_CONSTRAINT)) { // In the map the string is the serialization of the json representing the selection criteria so it is a valid json details.setAdvancedConstraints(SelectionCriteria.fromJson(asCsv(params.get(CPROFILE_ADVANCED_CONSTRAINT)))); } if (params.containsKey(CPROFILE_REMOVE_CONSTRAINT)) { // In the map the string is the serialization of the json representing the selection criteria so it is a valid json details.setRemoveConstraints(SelectionCriteria.fromJson(asCsv(params.get(CPROFILE_REMOVE_CONSTRAINT)))); } if (params.containsKey(CPROFILE_CREATIONDATE)) { try { details.setCreationDate(CommunityMappingUtils.asLocalDateTime(asCsv(params.get(CPROFILE_CREATIONDATE)))); } catch (final Exception e) { log.debug("Exception on date format: " + e.getMessage()); } } return details; } private static CommunityProject asCommunityProject(final String communityId, final Concept c) { final Map> p = c.getParams(); final CommunityProject project = new CommunityProject(); project.setCommunityId(communityId); project.setOpenaireId(firstValue(p, OPENAIRE_ID)); project.setFunder(firstValue(p, CPROJECT_FUNDER)); project.setGrantId(firstValue(p, CPROJECT_NUMBER)); project.setName(firstValue(p, CPROJECT_FULLNAME)); project.setAcronym(firstValue(p, CPROJECT_ACRONYM)); return project; } private static CommunityContentprovider asCommunityDataprovider(final String communityId, final Concept c) { final Map> p = c.getParams(); final CommunityContentprovider d = new CommunityContentprovider(); d.setCommunityId(communityId); d.setOpenaireId(firstValue(p, OPENAIRE_ID)); d.setName(firstValue(p, CCONTENTPROVIDER_NAME)); d.setOfficialname(firstValue(p, CCONTENTPROVIDER_OFFICIALNAME)); d.setSelectioncriteria(SelectionCriteria.fromJson(firstValue(p, CCONTENTPROVIDER_SELCRITERIA))); return d; } private static CommunityOrganization asCommunityOrganization(final String id, final Concept c) { final Map> p = c.getParams(); final CommunityOrganization o = new CommunityOrganization(); o.setCommunityId(id); o.setName(firstValue(p, CORGANIZATION_NAME)); o.setLogo_url(getDecodedUrl(firstValue(p, CORGANIZATION_LOGOURL))); o.setWebsite_url(getDecodedUrl(firstValue(p, CORGANIZATION_WEBSITEURL))); return o; } private static String asZenodoCommunity(final Concept c) { return firstValue(c.getParams(), CZENODOCOMMUNITY_ID); } private static List asSubCommunities(final String communityId, final String parent, final String category, final List concepts) { final List list = new ArrayList<>(); for (final Concept c : concepts) { final SubCommunity sc = new SubCommunity(); sc.setSubCommunityId(c.getId()); sc.setCommunityId(communityId); sc.setParent(parent); sc.setCategory(category); sc.setLabel(c.getLabel()); sc.setParams(c.getParams()); sc.setClaim(c.isClaim()); list.add(sc); list.addAll(asSubCommunities(communityId, c.getId(), category, c.getConcepts())); } return list; } private String findNamespaceForECProject(final String code) { final List list = jdbcTemplate.queryForList("SELECT substr(id, 1, 12) from projects where code = ? and id like 'corda%'", String.class, code); return list.isEmpty() ? null : list.get(0); } private static String getDecodedUrl(final String encoded_url) { if (encoded_url == null) { return encoded_url; } return new String(Base64.getDecoder().decode(encoded_url)); } private static List splitValues(final Stream stream, final String separator) { return stream.map(s -> s.split(separator)) .map(Arrays::asList) .flatMap(List::stream) .filter(StringUtils::isNotBlank) .map(StringUtils::trim) .collect(Collectors.toList()); } private static String firstValue(final Map> p, final String paramName) { return asValues(p.get(paramName)).findFirst().orElse(null); } private static String asCsv(final List params) { return asValues(params) .collect(Collectors.joining(CSV_DELIMITER)); } private static Stream asValues(final List params) { return params == null ? Stream.empty() : params.stream() .map(Param::getValue) .map(StringUtils::trim) .distinct(); } protected DbOrganizationRepository getDbOrganizationRepository() { return dbOrganizationRepository; } protected void setDbOrganizationRepository(final DbOrganizationRepository dbOrganizationRepository) { this.dbOrganizationRepository = dbOrganizationRepository; } protected CommunityService getService() { return service; } protected void setService(final CommunityService service) { this.service = service; } protected JdbcTemplate getJdbcTemplate() { return jdbcTemplate; } protected void setJdbcTemplate(final JdbcTemplate jdbcTemplate) { this.jdbcTemplate = jdbcTemplate; } }