408 lines
17 KiB
Java
408 lines
17 KiB
Java
package eu.dnetlib.openaire.community.importer;
|
|
|
|
import java.time.LocalDate;
|
|
import java.util.ArrayList;
|
|
import java.util.Arrays;
|
|
import java.util.Base64;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Map.Entry;
|
|
import java.util.function.Function;
|
|
import java.util.stream.Collectors;
|
|
import java.util.stream.Stream;
|
|
|
|
import javax.transaction.Transactional;
|
|
|
|
import org.apache.commons.lang3.BooleanUtils;
|
|
import org.apache.commons.lang3.StringUtils;
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
import org.dom4j.DocumentHelper;
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
|
import org.springframework.jdbc.core.JdbcTemplate;
|
|
import org.springframework.stereotype.Service;
|
|
|
|
import com.fasterxml.jackson.core.type.TypeReference;
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
import com.google.common.collect.Lists;
|
|
|
|
import eu.dnetlib.miscutils.functional.hash.Hashing;
|
|
import eu.dnetlib.openaire.community.CommunityService;
|
|
import eu.dnetlib.openaire.community.model.DbOrganization;
|
|
import eu.dnetlib.openaire.community.repository.DbOrganizationRepository;
|
|
import eu.dnetlib.openaire.community.utils.CommunityMappingUtils;
|
|
import eu.dnetlib.openaire.exporter.exceptions.CommunityException;
|
|
import eu.dnetlib.openaire.exporter.model.community.CommunityClaimType;
|
|
import eu.dnetlib.openaire.exporter.model.community.CommunityContentprovider;
|
|
import eu.dnetlib.openaire.exporter.model.community.CommunityDetails;
|
|
import eu.dnetlib.openaire.exporter.model.community.CommunityMembershipType;
|
|
import eu.dnetlib.openaire.exporter.model.community.CommunityOrganization;
|
|
import eu.dnetlib.openaire.exporter.model.community.CommunityProject;
|
|
import eu.dnetlib.openaire.exporter.model.community.CommunityStatus;
|
|
import eu.dnetlib.openaire.exporter.model.community.CommunityType;
|
|
import eu.dnetlib.openaire.exporter.model.community.SubCommunity;
|
|
import eu.dnetlib.openaire.exporter.model.community.selectioncriteria.SelectionCriteria;
|
|
import eu.dnetlib.openaire.exporter.model.context.Category;
|
|
import eu.dnetlib.openaire.exporter.model.context.Concept;
|
|
import eu.dnetlib.openaire.exporter.model.context.Context;
|
|
import eu.dnetlib.openaire.exporter.model.context.Param;
|
|
|
|
@Service
|
|
@ConditionalOnProperty(value = "openaire.exporter.enable.community.import", havingValue = "true")
|
|
public class CommunityImporterService {
|
|
|
|
// common
|
|
public final static String OPENAIRE_ID = "openaireId";
|
|
public final static String PIPE_SEPARATOR = "||";
|
|
public final static String ID_SEPARATOR = "::";
|
|
public final static String CSV_DELIMITER = ",";
|
|
public final static String CLABEL = "label";
|
|
|
|
// id suffixes
|
|
public final static String PROJECTS_ID_SUFFIX = ID_SEPARATOR + "projects";
|
|
public final static String CONTENTPROVIDERS_ID_SUFFIX = ID_SEPARATOR + "contentproviders";
|
|
public final static String ZENODOCOMMUNITY_ID_SUFFIX = ID_SEPARATOR + "zenodocommunities";
|
|
public final static String ORGANIZATION_ID_SUFFIX = ID_SEPARATOR + "organizations";
|
|
|
|
// community summary
|
|
public final static String CSUMMARY_DESCRIPTION = "description";
|
|
public final static String CSUMMARY_LOGOURL = "logourl";
|
|
public final static String CSUMMARY_STATUS = "status";
|
|
public final static String CSUMMARY_NAME = "name";
|
|
public final static String CSUMMARY_MANAGER = "manager";
|
|
public final static String CSUMMARY_ZENODOC = "zenodoCommunity";
|
|
|
|
// community profile
|
|
public final static String CPROFILE_SUBJECT = "subject";
|
|
public final static String CPROFILE_CREATIONDATE = "creationdate";
|
|
public final static String CPROFILE_FOS = "fos";
|
|
public final static String CPROFILE_SDG = "sdg";
|
|
public final static String CPROFILE_ADVANCED_CONSTRAINT = "advancedConstraints";
|
|
public final static String CPROFILE_REMOVE_CONSTRAINT = "removeConstraints";
|
|
public final static String CPROFILE_SUGGESTED_ACKNOWLEDGEMENT = "suggestedAcknowledgement";
|
|
|
|
// community project
|
|
public final static String CPROJECT_FUNDER = "funder";
|
|
public final static String CPROJECT_NUMBER = "CD_PROJECT_NUMBER";
|
|
public final static String CPROJECT_FULLNAME = "projectfullname";
|
|
public final static String CPROJECT_ACRONYM = "acronym";
|
|
|
|
// community content provider
|
|
public final static String CCONTENTPROVIDER_NAME = "name";
|
|
public final static String CCONTENTPROVIDER_OFFICIALNAME = "officialname";
|
|
public final static String CCONTENTPROVIDER_ENABLED = "enabled";
|
|
public final static String CCONTENTPROVIDERENABLED_DEFAULT = "true";
|
|
public final static String CCONTENTPROVIDER_SELCRITERIA = "selcriteria";
|
|
|
|
// community zenodo community
|
|
public final static String CZENODOCOMMUNITY_ID = "zenodoid";
|
|
|
|
// community organization
|
|
public final static String CORGANIZATION_NAME = "name";
|
|
public final static String CORGANIZATION_LOGOURL = "logourl";
|
|
public final static String CORGANIZATION_WEBSITEURL = "websiteurl";
|
|
|
|
@Autowired
|
|
private DbOrganizationRepository dbOrganizationRepository;
|
|
|
|
@Autowired
|
|
private CommunityService service;
|
|
|
|
@Autowired
|
|
private JdbcTemplate jdbcTemplate;
|
|
|
|
private static final Log log = LogFactory.getLog(CommunityImporterService.class);
|
|
|
|
public List<DbOrganization> importPropagationOrganizationsFromProfile(final String xml, final boolean simulation) throws Exception {
|
|
final String json = DocumentHelper.parseText(xml)
|
|
.selectSingleNode("//NODE[@name='setPropagationOrganizationCommunityMap']//PARAM[@name='parameterValue']")
|
|
.getText();
|
|
|
|
final List<DbOrganization> list = new ObjectMapper()
|
|
.readValue(json, new TypeReference<Map<String, List<String>>>() {})
|
|
.entrySet()
|
|
.stream()
|
|
.flatMap(e -> e.getValue()
|
|
.stream()
|
|
.map(community -> {
|
|
if (e.getKey().contains("|")) { return new DbOrganization(community, StringUtils.substringAfter(e.getKey(), "|")); }
|
|
return new DbOrganization(community, e.getKey());
|
|
}))
|
|
.collect(Collectors.toList());
|
|
|
|
if (!simulation) {
|
|
list.forEach(o -> {
|
|
try {
|
|
dbOrganizationRepository.save(o);
|
|
} catch (final Throwable e) {
|
|
log.error("ERROR saving org: " + o);
|
|
}
|
|
});
|
|
}
|
|
|
|
return list;
|
|
}
|
|
|
|
@Transactional
|
|
public void importCommunity(final Context context) {
|
|
try {
|
|
|
|
final CommunityDetails community = asCommunityDetails(context);
|
|
|
|
final List<CommunityContentprovider> datasources =
|
|
getCommunityInfo(context, CONTENTPROVIDERS_ID_SUFFIX, c -> asCommunityDataprovider(context.getId(), c))
|
|
.stream()
|
|
.map(o -> {
|
|
if (o.getOpenaireId() == null) {
|
|
log.warn("Openaire ID is missing, organization: " + o.getOfficialname());
|
|
} else if (o.getOpenaireId().contains("|")) {
|
|
o.setOpenaireId(StringUtils.substringAfter(o.getOpenaireId(), "|"));
|
|
}
|
|
return o;
|
|
})
|
|
.filter(o -> o.getOpenaireId() != null)
|
|
.collect(Collectors.toList());
|
|
|
|
final List<CommunityProject> projects =
|
|
getCommunityInfo(context, PROJECTS_ID_SUFFIX, c -> asCommunityProject(context.getId(), c))
|
|
.stream()
|
|
.map(p -> {
|
|
if (p.getOpenaireId() == null) {
|
|
if ("EC".equalsIgnoreCase(p.getFunder())) {
|
|
final String ns = findNamespaceForECProject(p.getGrantId());
|
|
if (ns != null) {
|
|
p.setOpenaireId(ns + "::" + Hashing.md5(p.getGrantId()));
|
|
} else {
|
|
log.warn("EC project not in the db: " + p.getGrantId());
|
|
}
|
|
} else if ("NSF".equalsIgnoreCase(p.getFunder())) {
|
|
p.setOpenaireId("nsf_________::" + Hashing.md5(p.getGrantId()));
|
|
} else if ("NIH".equalsIgnoreCase(p.getFunder())) {
|
|
p.setOpenaireId("nih_________::" + Hashing.md5(p.getGrantId()));
|
|
} else {
|
|
log.warn("Openaire ID is missing, funder: " + p.getFunder());
|
|
}
|
|
} else if (p.getOpenaireId().contains("|")) {
|
|
p.setOpenaireId(StringUtils.substringAfter(p.getOpenaireId(), "|"));
|
|
}
|
|
return p;
|
|
})
|
|
.filter(p -> p.getOpenaireId() != null)
|
|
.collect(Collectors.toList());
|
|
|
|
final List<CommunityOrganization> orgs =
|
|
getCommunityInfo(context, ORGANIZATION_ID_SUFFIX, c -> asCommunityOrganization(context.getId(), c));
|
|
|
|
final List<String> otherZenodoCommunities =
|
|
getCommunityInfo(context, ZENODOCOMMUNITY_ID_SUFFIX, CommunityImporterService::asZenodoCommunity);
|
|
|
|
community.setOtherZenodoCommunities(otherZenodoCommunities);
|
|
|
|
final List<SubCommunity> subs = context.getCategories()
|
|
.entrySet()
|
|
.stream()
|
|
.filter(e -> !(context.getId() + CONTENTPROVIDERS_ID_SUFFIX).equals(e.getKey()))
|
|
.filter(e -> !(context.getId() + PROJECTS_ID_SUFFIX).equals(e.getKey()))
|
|
.filter(e -> !(context.getId() + ORGANIZATION_ID_SUFFIX).equals(e.getKey()))
|
|
.filter(e -> !(context.getId() + ZENODOCOMMUNITY_ID_SUFFIX).equals(e.getKey()))
|
|
.map(Entry::getValue)
|
|
.map(cat -> asSubCommunities(context.getId(), null, cat.getLabel(), cat.getConcepts()))
|
|
.flatMap(List::stream)
|
|
.collect(Collectors.toList());
|
|
|
|
service.saveCommunity(community);
|
|
service.addCommunityProjects(context.getId(), projects.toArray(new CommunityProject[projects.size()]));
|
|
service.addCommunityDatasources(context.getId(), datasources.toArray(new CommunityContentprovider[datasources.size()]));
|
|
service.addCommunityOrganizations(context.getId(), orgs.toArray(new CommunityOrganization[orgs.size()]));
|
|
service.addSubCommunities(context.getId(), subs.toArray(new SubCommunity[subs.size()]));
|
|
} catch (
|
|
|
|
final Exception e) {
|
|
throw new RuntimeException("Error importing community: " + context.getId(), e);
|
|
}
|
|
}
|
|
|
|
private <R> List<R> getCommunityInfo(final Context context, final String idSuffix, final Function<Concept, R> mapping)
|
|
throws CommunityException {
|
|
if (context != null) {
|
|
final Map<String, Category> categories = context.getCategories();
|
|
final Category category = categories.get(context.getId() + idSuffix);
|
|
if (category != null) { return category.getConcepts()
|
|
.stream()
|
|
.map(mapping)
|
|
.collect(Collectors.toList()); }
|
|
}
|
|
return Lists.newArrayList();
|
|
}
|
|
|
|
private static CommunityDetails asCommunityDetails(final Context c) {
|
|
|
|
final CommunityDetails details = new CommunityDetails();
|
|
|
|
details.setId(c.getId());
|
|
details.setShortName(c.getLabel());
|
|
details.setLastUpdateDate(CommunityMappingUtils.asLocalDateTime(c.getLastUpdateDate()));
|
|
details.setCreationDate(CommunityMappingUtils.asLocalDateTime(c.getCreationDate()));
|
|
details.setQueryId(c.getId() + PIPE_SEPARATOR + c.getLabel());
|
|
details.setType(CommunityType.valueOf(c.getType()));
|
|
details.setMembership(CommunityMembershipType.open);
|
|
details.setClaim(CommunityClaimType.all);
|
|
details.setDescription(asCsv(CSUMMARY_DESCRIPTION, c.getParams()));
|
|
details.setLogoUrl(asCsv(CSUMMARY_LOGOURL, c.getParams()));
|
|
|
|
final String status = firstValue(CSUMMARY_STATUS, c.getParams());
|
|
if (StringUtils.isNotBlank(status)) {
|
|
details.setStatus(CommunityStatus.valueOf(status));
|
|
} else {
|
|
details.setStatus(CommunityStatus.hidden);
|
|
}
|
|
|
|
details.setName(StringUtils.firstNonBlank(asCsv(CSUMMARY_NAME, c.getParams()), c.getLabel()));
|
|
details.setZenodoCommunity(asCsv(CSUMMARY_ZENODOC, c.getParams()));
|
|
details.setSubjects(splitValues(asValues(CPROFILE_SUBJECT, c.getParams()), CSV_DELIMITER));
|
|
details.setFos(splitValues(asValues(CPROFILE_FOS, c.getParams()), CSV_DELIMITER));
|
|
details.setSdg(splitValues(asValues(CPROFILE_SDG, c.getParams()), CSV_DELIMITER));
|
|
// In the map the string is the serialization of the json representing the selection criteria so it is a valid json
|
|
details.setAdvancedConstraints(SelectionCriteria.fromJson(asCsv(CPROFILE_ADVANCED_CONSTRAINT, c.getParams())));
|
|
// In the map the string is the serialization of the json representing the selection criteria so it is a valid json
|
|
details.setRemoveConstraints(SelectionCriteria.fromJson(asCsv(CPROFILE_REMOVE_CONSTRAINT, c.getParams())));
|
|
details.setSuggestedAcknowledgements(splitValues(asValues(CPROFILE_SUGGESTED_ACKNOWLEDGEMENT, c.getParams()), CSV_DELIMITER));
|
|
details.setPlan(null);
|
|
try {
|
|
details.setCreationDate(CommunityMappingUtils.asLocalDateTime(asCsv(CPROFILE_CREATIONDATE, c.getParams())));
|
|
} catch (final Exception e) {
|
|
log.debug("Exception on date format: " + e.getMessage());
|
|
}
|
|
|
|
return details;
|
|
}
|
|
|
|
private static CommunityProject asCommunityProject(final String communityId, final Concept c) {
|
|
final List<Param> p = c.getParams();
|
|
final CommunityProject project = new CommunityProject();
|
|
project.setCommunityId(communityId);
|
|
project.setOpenaireId(firstValue(OPENAIRE_ID, p));
|
|
project.setFunder(firstValue(CPROJECT_FUNDER, p));
|
|
project.setGrantId(firstValue(CPROJECT_NUMBER, p));
|
|
project.setName(firstValue(CPROJECT_FULLNAME, p));
|
|
project.setAcronym(firstValue(CPROJECT_ACRONYM, p));
|
|
project.setAvailableSince(LocalDate.of(2017, 2, 25)); // Birillo Birth Date
|
|
return project;
|
|
}
|
|
|
|
private static CommunityContentprovider asCommunityDataprovider(final String communityId, final Concept c) {
|
|
final List<Param> p = c.getParams();
|
|
final CommunityContentprovider d = new CommunityContentprovider();
|
|
d.setCommunityId(communityId);
|
|
d.setOpenaireId(firstValue(OPENAIRE_ID, p));
|
|
d.setName(firstValue(CCONTENTPROVIDER_NAME, p));
|
|
d.setOfficialname(firstValue(CCONTENTPROVIDER_OFFICIALNAME, p));
|
|
d.setEnabled(BooleanUtils.toBoolean(firstValue(CCONTENTPROVIDER_ENABLED, p)));
|
|
d.setSelectioncriteria(SelectionCriteria.fromJson(firstValue(CCONTENTPROVIDER_SELCRITERIA, p)));
|
|
d.setDeposit(false);
|
|
d.setMessage(null);
|
|
return d;
|
|
}
|
|
|
|
private static CommunityOrganization asCommunityOrganization(final String id, final Concept c) {
|
|
final List<Param> p = c.getParams();
|
|
final CommunityOrganization o = new CommunityOrganization();
|
|
o.setCommunityId(id);
|
|
o.setName(firstValue(CORGANIZATION_NAME, p));
|
|
o.setLogo_url(getDecodedUrl(firstValue(CORGANIZATION_LOGOURL, p)));
|
|
o.setWebsite_url(getDecodedUrl(firstValue(CORGANIZATION_WEBSITEURL, p)));
|
|
return o;
|
|
}
|
|
|
|
private static String asZenodoCommunity(final Concept c) {
|
|
return firstValue(CZENODOCOMMUNITY_ID, c.getParams());
|
|
}
|
|
|
|
private static List<SubCommunity> asSubCommunities(final String communityId, final String parent, final String category, final List<Concept> concepts) {
|
|
final List<SubCommunity> list = new ArrayList<>();
|
|
for (final Concept c : concepts) {
|
|
final SubCommunity sc = new SubCommunity();
|
|
sc.setSubCommunityId(c.getId());
|
|
sc.setCommunityId(communityId);
|
|
sc.setParent(parent);
|
|
sc.setCategory(category);
|
|
sc.setLabel(c.getLabel());
|
|
sc.setParams(c.getParams());
|
|
sc.setClaim(c.isClaim());
|
|
sc.setBrowsable(false);
|
|
list.add(sc);
|
|
list.addAll(asSubCommunities(communityId, c.getId(), category, c.getConcepts()));
|
|
}
|
|
return list;
|
|
}
|
|
|
|
private String findNamespaceForECProject(final String code) {
|
|
final List<String> list =
|
|
jdbcTemplate.queryForList("SELECT substr(id, 1, 12) from projects where code = ? and id like 'corda%'", String.class, code);
|
|
return list.isEmpty() ? null : list.get(0);
|
|
}
|
|
|
|
private static String getDecodedUrl(final String encoded_url) {
|
|
if (encoded_url == null || encoded_url.startsWith("http")) { return encoded_url; }
|
|
try {
|
|
return new String(Base64.getDecoder().decode(encoded_url));
|
|
} catch (final Exception e) {
|
|
log.warn("Invalid base64: " + encoded_url);
|
|
return encoded_url;
|
|
}
|
|
}
|
|
|
|
private static List<String> splitValues(final Stream<String> stream, final String separator) {
|
|
return stream.map(s -> s.split(separator))
|
|
.map(Arrays::asList)
|
|
.flatMap(List::stream)
|
|
.filter(StringUtils::isNotBlank)
|
|
.map(StringUtils::trim)
|
|
.collect(Collectors.toList());
|
|
}
|
|
|
|
private static String firstValue(final String name, final List<Param> params) {
|
|
return asValues(name, params).findFirst().orElse(null);
|
|
}
|
|
|
|
private static String asCsv(final String name, final List<Param> params) {
|
|
return asValues(name, params).collect(Collectors.joining(CSV_DELIMITER));
|
|
}
|
|
|
|
private static Stream<String> asValues(final String name, final List<Param> params) {
|
|
return params == null ? Stream.empty()
|
|
: params.stream()
|
|
.filter(p -> p != null)
|
|
.filter(p -> StringUtils.isNotBlank(p.getName()))
|
|
.filter(p -> p.getName().trim().equals(name.trim()))
|
|
.map(Param::getValue)
|
|
.map(StringUtils::trim)
|
|
.distinct();
|
|
}
|
|
|
|
protected DbOrganizationRepository getDbOrganizationRepository() {
|
|
return dbOrganizationRepository;
|
|
}
|
|
|
|
protected void setDbOrganizationRepository(final DbOrganizationRepository dbOrganizationRepository) {
|
|
this.dbOrganizationRepository = dbOrganizationRepository;
|
|
}
|
|
|
|
protected CommunityService getService() {
|
|
return service;
|
|
}
|
|
|
|
protected void setService(final CommunityService service) {
|
|
this.service = service;
|
|
}
|
|
|
|
protected JdbcTemplate getJdbcTemplate() {
|
|
return jdbcTemplate;
|
|
}
|
|
|
|
protected void setJdbcTemplate(final JdbcTemplate jdbcTemplate) {
|
|
this.jdbcTemplate = jdbcTemplate;
|
|
}
|
|
}
|