package eu.dnetlib.dhp.oa.graph.raw; import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.createOpenaireId; import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.field; import static eu.dnetlib.dhp.schema.oaf.OafMapperUtils.structuredProperty; import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Element; import org.dom4j.Node; import com.google.common.collect.Lists; import eu.dnetlib.dhp.common.PacePerson; import eu.dnetlib.dhp.oa.graph.raw.common.VocabularyGroup; import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; public class OafToOafMapper extends AbstractMdRecordToOafMapper { public OafToOafMapper(final VocabularyGroup vocs, final boolean invisible) { super(vocs, invisible); } @Override protected List prepareAuthors(final Document doc, final DataInfo info) { final List res = new ArrayList<>(); int pos = 1; for (final Object o : doc.selectNodes("//dc:creator")) { final Element e = (Element) o; final Author author = new Author(); author.setFullname(e.getText()); author.setRank(pos++); final PacePerson p = new PacePerson(e.getText(), false); if (p.isAccurate()) { author.setName(p.getNormalisedFirstName()); author.setSurname(p.getNormalisedSurname()); } final String pid = e.valueOf("./@nameIdentifier"); final String type = e .valueOf("./@nameIdentifierScheme") .trim() .toUpperCase() .replaceAll(" ", "") .replaceAll("_", ""); author.setPid(new ArrayList<>()); if (StringUtils.isNotBlank(pid)) { if (type.startsWith("ORCID")) { final String cleanedId = pid .replaceAll("http://orcid.org/", "") .replaceAll("https://orcid.org/", ""); author.getPid().add(structuredProperty(cleanedId, ORCID_PID_TYPE, info)); } else if (type.startsWith("MAGID")) { author.getPid().add(structuredProperty(pid, MAG_PID_TYPE, info)); } } res.add(author); } return res; } @Override protected Qualifier prepareLanguages(final Document doc) { return prepareQualifier(doc, "//dc:language", DNET_LANGUAGES); } @Override protected List prepareSubjects(final Document doc, final DataInfo info) { return prepareListStructProps(doc, "//dc:subject", info); } @Override protected List prepareTitles(final Document doc, final DataInfo info) { return prepareListStructProps(doc, "//dc:title", MAIN_TITLE_QUALIFIER, info); } @Override protected List> prepareDescriptions(final Document doc, final DataInfo info) { return prepareListFields(doc, "//dc:description", info) .stream() .map(d -> { d.setValue(StringUtils.left(d.getValue(), ModelHardLimits.MAX_ABSTRACT_LENGTH)); return d; }) .collect(Collectors.toList()); } @Override protected Field preparePublisher(final Document doc, final DataInfo info) { return prepareField(doc, "//dc:publisher", info); } @Override protected List> prepareFormats(final Document doc, final DataInfo info) { return prepareListFields(doc, "//dc:format", info); } @Override protected List> prepareContributors(final Document doc, final DataInfo info) { return prepareListFields(doc, "//dc:contributor", info); } @Override protected List> prepareCoverages(final Document doc, final DataInfo info) { return prepareListFields(doc, "//dc:coverage", info); } @Override protected List prepareInstances( final Document doc, final DataInfo info, final KeyValue collectedfrom, final KeyValue hostedby) { final Instance instance = new Instance(); instance .setInstancetype(prepareQualifier(doc, "//dr:CobjCategory", DNET_PUBLICATION_RESOURCE)); instance.setCollectedfrom(collectedfrom); instance.setHostedby(hostedby); instance.setDateofacceptance(field(doc.valueOf("//oaf:dateAccepted"), info)); instance.setDistributionlocation(doc.valueOf("//oaf:distributionlocation")); instance .setAccessright(prepareQualifier(doc, "//oaf:accessrights", DNET_ACCESS_MODES)); instance.setLicense(field(doc.valueOf("//oaf:license"), info)); instance.setRefereed(prepareQualifier(doc, "//oaf:refereed", DNET_REVIEW_LEVELS)); instance .setProcessingchargeamount(field(doc.valueOf("//oaf:processingchargeamount"), info)); instance .setProcessingchargecurrency(field(doc.valueOf("//oaf:processingchargeamount/@currency"), info)); final List nodes = Lists.newArrayList(doc.selectNodes("//dc:identifier")); instance .setUrl( nodes .stream() .filter(n -> StringUtils.isNotBlank(n.getText())) .map(n -> n.getText().trim()) .filter(u -> u.startsWith("http")) .distinct() .collect(Collectors.toCollection(ArrayList::new))); return Lists.newArrayList(instance); } @Override protected List> prepareSources(final Document doc, final DataInfo info) { return prepareListFields(doc, "//dc:source", info); } @Override protected List prepareRelevantDates(final Document doc, final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } // SOFTWARES @Override protected Qualifier prepareSoftwareProgrammingLanguage(final Document doc, final DataInfo info) { return null; // NOT PRESENT IN OAF } @Override protected Field prepareSoftwareCodeRepositoryUrl( final Document doc, final DataInfo info) { return null; // NOT PRESENT IN OAF } @Override protected List prepareSoftwareLicenses( final Document doc, final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override protected List> prepareSoftwareDocumentationUrls( final Document doc, final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } // DATASETS @Override protected List prepareDatasetGeoLocations(final Document doc, final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override protected Field prepareDatasetMetadataVersionNumber( final Document doc, final DataInfo info) { return null; // NOT PRESENT IN OAF } @Override protected Field prepareDatasetLastMetadataUpdate( final Document doc, final DataInfo info) { return null; // NOT PRESENT IN OAF } @Override protected Field prepareDatasetVersion(final Document doc, final DataInfo info) { return null; // NOT PRESENT IN OAF } @Override protected Field prepareDatasetSize(final Document doc, final DataInfo info) { return null; // NOT PRESENT IN OAF } @Override protected Field prepareDatasetDevice(final Document doc, final DataInfo info) { return null; // NOT PRESENT IN OAF } @Override protected Field prepareDatasetStorageDate(final Document doc, final DataInfo info) { return null; // NOT PRESENT IN OAF } // OTHER PRODUCTS @Override protected List> prepareOtherResearchProductTools( final Document doc, final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override protected List> prepareOtherResearchProductContactGroups( final Document doc, final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override protected List> prepareOtherResearchProductContactPersons( final Document doc, final DataInfo info) { return new ArrayList<>(); // NOT PRESENT IN OAF } @Override protected List addOtherResultRels( final Document doc, final OafEntity entity) { final String docId = entity.getId(); final List res = new ArrayList<>(); for (final Object o : doc.selectNodes("//*[local-name()='relatedDataset']")) { final String originalId = ((Node) o).getText(); if (StringUtils.isNotBlank(originalId)) { final String otherId = createOpenaireId(50, originalId, false); res .add( getRelation( docId, otherId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, entity)); res .add( getRelation( otherId, docId, RESULT_RESULT, RELATIONSHIP, IS_RELATED_TO, entity)); } } return res; } @Override protected Qualifier prepareResourceType(final Document doc, final DataInfo info) { return null; // NOT PRESENT IN OAF } @Override protected List prepareResultPids(final Document doc, final DataInfo info) { return prepareListStructPropsWithValidQualifier( doc, "//oaf:identifier", "@identifierType", DNET_PID_TYPES, info) .stream() .map(CleaningFunctions::normalizePidValue) .collect(Collectors.toList()); } }