package eu.dnetlib.dhp.broker.oa.util; import java.util.ArrayList; import java.util.List; import java.util.Objects; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Function; import eu.dnetlib.broker.objects.OaBrokerAuthor; import eu.dnetlib.broker.objects.OaBrokerExternalReference; import eu.dnetlib.broker.objects.OaBrokerInstance; import eu.dnetlib.broker.objects.OaBrokerJournal; import eu.dnetlib.broker.objects.OaBrokerMainEntity; import eu.dnetlib.broker.objects.OaBrokerProject; import eu.dnetlib.broker.objects.OaBrokerRelatedDataset; import eu.dnetlib.broker.objects.OaBrokerRelatedDatasource; import eu.dnetlib.broker.objects.OaBrokerRelatedPublication; import eu.dnetlib.broker.objects.OaBrokerRelatedSoftware; import eu.dnetlib.broker.objects.OaBrokerTypedValue; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Author; import eu.dnetlib.dhp.schema.oaf.Dataset; import eu.dnetlib.dhp.schema.oaf.Datasource; import eu.dnetlib.dhp.schema.oaf.ExternalReference; import eu.dnetlib.dhp.schema.oaf.Field; import eu.dnetlib.dhp.schema.oaf.Instance; import eu.dnetlib.dhp.schema.oaf.Journal; import eu.dnetlib.dhp.schema.oaf.KeyValue; import eu.dnetlib.dhp.schema.oaf.Project; import eu.dnetlib.dhp.schema.oaf.Publication; import eu.dnetlib.dhp.schema.oaf.Qualifier; import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.Software; import eu.dnetlib.dhp.schema.oaf.StructuredProperty; public class ConversionUtils { private static final Logger log = LoggerFactory.getLogger(ConversionUtils.class); public static List oafInstanceToBrokerInstances(final Instance i) { if (i == null) { return new ArrayList<>(); } return mappedList(i.getUrl(), url -> { final OaBrokerInstance res = new OaBrokerInstance(); res.setUrl(url); res.setInstancetype(classId(i.getInstancetype())); res.setLicense(BrokerConstants.OPEN_ACCESS); res.setHostedby(kvValue(i.getHostedby())); return res; }); } public static OaBrokerTypedValue oafPidToBrokerPid(final StructuredProperty sp) { return oafStructPropToBrokerTypedValue(sp); } public static OaBrokerTypedValue oafStructPropToBrokerTypedValue(final StructuredProperty sp) { return sp != null ? new OaBrokerTypedValue(classId(sp.getQualifier()), sp.getValue()) : null; } public static final OaBrokerRelatedDataset oafDatasetToBrokerDataset(final Dataset d) { if (d == null) { return null; } final OaBrokerRelatedDataset res = new OaBrokerRelatedDataset(); res.setOpenaireId(cleanOpenaireId(d.getId())); res.setOriginalId(first(d.getOriginalId())); res.setTitle(structPropValue(d.getTitle())); res.setPids(mappedList(d.getPid(), ConversionUtils::oafPidToBrokerPid)); res.setInstances(flatMappedList(d.getInstance(), ConversionUtils::oafInstanceToBrokerInstances)); res.setCollectedFrom(mappedFirst(d.getCollectedfrom(), KeyValue::getValue)); return res; } public static OaBrokerRelatedPublication oafPublicationToBrokerPublication(final Publication p) { if (p == null) { return null; } final OaBrokerRelatedPublication res = new OaBrokerRelatedPublication(); res.setOpenaireId(cleanOpenaireId(p.getId())); res.setOriginalId(first(p.getOriginalId())); res.setTitle(structPropValue(p.getTitle())); res.setPids(mappedList(p.getPid(), ConversionUtils::oafPidToBrokerPid)); res.setInstances(flatMappedList(p.getInstance(), ConversionUtils::oafInstanceToBrokerInstances)); res.setCollectedFrom(mappedFirst(p.getCollectedfrom(), KeyValue::getValue)); return res; } public static final OaBrokerMainEntity oafResultToBrokerResult(final Result result) { if (result == null) { return null; } final OaBrokerMainEntity res = new OaBrokerMainEntity(); res.setOpenaireId(cleanOpenaireId(result.getId())); res.setOriginalId(first(result.getOriginalId())); res.setTypology(classId(result.getResulttype())); res.setTitles(structPropList(result.getTitle())); res.setAbstracts(fieldList(result.getDescription())); res.setLanguage(classId(result.getLanguage())); res.setSubjects(structPropTypedList(result.getSubject())); res.setCreators(mappedList(result.getAuthor(), ConversionUtils::oafAuthorToBrokerAuthor)); res.setPublicationdate(fieldValue(result.getDateofacceptance())); res.setPublisher(fieldValue(result.getPublisher())); res.setEmbargoenddate(fieldValue(result.getEmbargoenddate())); res.setContributor(fieldList(result.getContributor())); res .setJournal( result instanceof Publication ? oafJournalToBrokerJournal(((Publication) result).getJournal()) : null); res.setPids(mappedList(result.getPid(), ConversionUtils::oafPidToBrokerPid)); res.setInstances(flatMappedList(result.getInstance(), ConversionUtils::oafInstanceToBrokerInstances)); res .setExternalReferences(mappedList(result.getExternalReference(), ConversionUtils::oafExtRefToBrokerExtRef)); return res; } public static String cleanOpenaireId(final String id) { return id.contains("|") ? StringUtils.substringAfter(id, "|") : id; } private static OaBrokerAuthor oafAuthorToBrokerAuthor(final Author author) { if (author == null) { return null; } final String pids = author.getPid() != null ? author .getPid() .stream() .filter(pid -> pid != null) .filter(pid -> pid.getQualifier() != null) .filter(pid -> pid.getQualifier().getClassid() != null) .filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID)) .map(pid -> pid.getValue()) .map(pid -> cleanOrcid(pid)) .filter(StringUtils::isNotBlank) .findFirst() .orElse(null) : null; return new OaBrokerAuthor(author.getFullname(), pids); } private static String cleanOrcid(final String s) { final String match = "//orcid.org/"; return s.contains(match) ? StringUtils.substringAfter(s, match) : s; } private static OaBrokerJournal oafJournalToBrokerJournal(final Journal journal) { if (journal == null) { return null; } final OaBrokerJournal res = new OaBrokerJournal(); res.setName(journal.getName()); res.setIssn(journal.getIssnPrinted()); res.setEissn(journal.getIssnOnline()); res.setLissn(journal.getIssnLinking()); return res; } private static OaBrokerExternalReference oafExtRefToBrokerExtRef(final ExternalReference ref) { if (ref == null) { return null; } final OaBrokerExternalReference res = new OaBrokerExternalReference(); res.setRefidentifier(ref.getRefidentifier()); res.setSitename(ref.getSitename()); res.setType(classId(ref.getQualifier())); res.setUrl(ref.getUrl()); return res; } public static final OaBrokerProject oafProjectToBrokerProject(final Project p) { if (p == null) { return null; } final OaBrokerProject res = new OaBrokerProject(); res.setOpenaireId(cleanOpenaireId(p.getId())); res.setTitle(fieldValue(p.getTitle())); res.setAcronym(fieldValue(p.getAcronym())); res.setCode(fieldValue(p.getCode())); final String ftree = fieldValue(p.getFundingtree()); if (StringUtils.isNotBlank(ftree)) { try { final Document fdoc = DocumentHelper.parseText(ftree); res.setFunder(fdoc.valueOf("/fundingtree/funder/shortname")); res.setJurisdiction(fdoc.valueOf("/fundingtree/funder/jurisdiction")); res.setFundingProgram(fdoc.valueOf("//funding_level_0/name")); } catch (final DocumentException e) { log.error("Error in record " + p.getId() + ": invalid fundingtree: " + ftree); } } return res; } public static final OaBrokerRelatedSoftware oafSoftwareToBrokerSoftware(final Software sw) { if (sw == null) { return null; } final OaBrokerRelatedSoftware res = new OaBrokerRelatedSoftware(); res.setOpenaireId(cleanOpenaireId(sw.getId())); res.setName(structPropValue(sw.getTitle())); res.setDescription(fieldValue(sw.getDescription())); res.setRepository(fieldValue(sw.getCodeRepositoryUrl())); res.setLandingPage(fieldValue(sw.getDocumentationUrl())); return res; } public static final OaBrokerRelatedDatasource oafDatasourceToBrokerDatasource(final Datasource ds) { if (ds == null) { return null; } final OaBrokerRelatedDatasource res = new OaBrokerRelatedDatasource(); res.setName(StringUtils.defaultIfBlank(fieldValue(ds.getOfficialname()), fieldValue(ds.getEnglishname()))); res.setOpenaireId(cleanOpenaireId(ds.getId())); res.setType(classId(ds.getDatasourcetype())); return res; } private static String first(final List list) { return list != null && list.size() > 0 ? list.get(0) : null; } private static String kvValue(final KeyValue kv) { return kv != null ? kv.getValue() : null; } private static String fieldValue(final Field f) { return f != null ? f.getValue() : null; } private static String fieldValue(final List> fl) { return fl != null ? fl.stream().map(Field::getValue).filter(StringUtils::isNotBlank).findFirst().orElse(null) : null; } private static String classId(final Qualifier q) { return q != null ? q.getClassid() : null; } private static String structPropValue(final List props) { return props != null ? props.stream().map(StructuredProperty::getValue).filter(StringUtils::isNotBlank).findFirst().orElse(null) : null; } private static List fieldList(final List> fl) { return fl != null ? fl .stream() .map(Field::getValue) .map(s -> StringUtils.abbreviate(s, BrokerConstants.MAX_STRING_SIZE)) .filter(StringUtils::isNotBlank) .limit(BrokerConstants.MAX_LIST_SIZE) .collect(Collectors.toList()) : new ArrayList<>(); } private static List structPropList(final List props) { return props != null ? props .stream() .map(StructuredProperty::getValue) .filter(StringUtils::isNotBlank) .limit(BrokerConstants.MAX_LIST_SIZE) .collect(Collectors.toList()) : new ArrayList<>(); } private static List structPropTypedList(final List list) { if (list == null) { return new ArrayList<>(); } return list .stream() .map(ConversionUtils::oafStructPropToBrokerTypedValue) .filter(Objects::nonNull) .collect(Collectors.toList()); } private static List mappedList(final List list, final Function func) { if (list == null) { return new ArrayList<>(); } return list .stream() .map(func::apply) .filter(Objects::nonNull) .limit(BrokerConstants.MAX_LIST_SIZE) .collect(Collectors.toList()); } private static List flatMappedList(final List list, final Function> func) { if (list == null) { return new ArrayList<>(); } return list .stream() .map(func::apply) .flatMap(List::stream) .filter(Objects::nonNull) .limit(BrokerConstants.MAX_LIST_SIZE) .collect(Collectors.toList()); } private static T mappedFirst(final List list, final Function func) { if (list == null) { return null; } return list .stream() .map(func::apply) .filter(Objects::nonNull) .findFirst() .orElse(null); } }