towards EOSC datasource profiles #130

Merged
alessia.bardi merged 7 commits from datasource_model_eosc_beta into beta 2021-08-23 11:58:35 +02:00
8 changed files with 278 additions and 364 deletions
Showing only changes of commit 52e2315ba2 - Show all commits

View File

@ -38,8 +38,7 @@ import scala.Tuple2;
/**
* Groups the graph content by entity identifier to ensure ID uniqueness
*/
public class
GroupEntitiesSparkJob {
public class GroupEntitiesSparkJob {
private static final Logger log = LoggerFactory.getLogger(GroupEntitiesSparkJob.class);

View File

@ -22,7 +22,6 @@ import org.apache.spark.util.LongAccumulator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Maps;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
@ -42,61 +41,51 @@ public class XmlConverterJob {
public static final String schemaLocation = "https://www.openaire.eu/schema/1.0/oaf-1.0.xsd";
public static void main(String[] args) throws Exception {
public static void main(final String[] args) throws Exception {
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
IOUtils
.toString(
XmlConverterJob.class
.getResourceAsStream(
"/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json")));
.getResourceAsStream("/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json")));
parser.parseArgument(args);
Boolean isSparkSessionManaged = Optional
final Boolean isSparkSessionManaged = Optional
.ofNullable(parser.get("isSparkSessionManaged"))
.map(Boolean::valueOf)
.orElse(Boolean.TRUE);
log.info("isSparkSessionManaged: {}", isSparkSessionManaged);
String inputPath = parser.get("inputPath");
final String inputPath = parser.get("inputPath");
log.info("inputPath: {}", inputPath);
String outputPath = parser.get("outputPath");
final String outputPath = parser.get("outputPath");
log.info("outputPath: {}", outputPath);
String isLookupUrl = parser.get("isLookupUrl");
final String isLookupUrl = parser.get("isLookupUrl");
log.info("isLookupUrl: {}", isLookupUrl);
String otherDsTypeId = parser.get("otherDsTypeId");
log.info("otherDsTypeId: {}", otherDsTypeId);
SparkConf conf = new SparkConf();
final SparkConf conf = new SparkConf();
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
conf.registerKryoClasses(ProvisionModelSupport.getModelClasses());
runWithSparkSession(
conf,
isSparkSessionManaged,
spark -> {
removeOutputDir(spark, outputPath);
convertToXml(
spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl), otherDsTypeId);
});
runWithSparkSession(conf, isSparkSessionManaged, spark -> {
removeOutputDir(spark, outputPath);
convertToXml(spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl));
});
}
private static void convertToXml(
SparkSession spark,
String inputPath,
String outputPath,
ContextMapper contextMapper,
String otherDsTypeId) {
final SparkSession spark,
final String inputPath,
final String outputPath,
final ContextMapper contextMapper) {
final XmlRecordFactory recordFactory = new XmlRecordFactory(
prepareAccumulators(spark.sparkContext()),
contextMapper,
false,
schemaLocation,
otherDsTypeId);
schemaLocation);
final List<String> paths = HdfsSupport
.listFiles(inputPath, spark.sparkContext().hadoopConfiguration());
@ -116,16 +105,15 @@ public class XmlConverterJob {
.mapToPair(
(PairFunction<Tuple2<String, String>, Text, Text>) t -> new Tuple2<>(new Text(t._1()),
new Text(t._2())))
.saveAsHadoopFile(
outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
.saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
}
private static void removeOutputDir(SparkSession spark, String path) {
private static void removeOutputDir(final SparkSession spark, final String path) {
HdfsSupport.remove(path, spark.sparkContext().hadoopConfiguration());
}
private static Map<String, LongAccumulator> prepareAccumulators(SparkContext sc) {
Map<String, LongAccumulator> accumulators = Maps.newHashMap();
private static Map<String, LongAccumulator> prepareAccumulators(final SparkContext sc) {
final Map<String, LongAccumulator> accumulators = Maps.newHashMap();
accumulators
.put(
"resultResult_similarity_isAmongTopNSimilarDocuments",
@ -136,15 +124,13 @@ public class XmlConverterJob {
sc.longAccumulator("resultResult_similarity_hasAmongTopNSimilarDocuments"));
accumulators
.put(
"resultResult_supplement_isSupplementTo",
sc.longAccumulator("resultResult_supplement_isSupplementTo"));
"resultResult_supplement_isSupplementTo", sc.longAccumulator("resultResult_supplement_isSupplementTo"));
accumulators
.put(
"resultResult_supplement_isSupplementedBy",
sc.longAccumulator("resultResult_supplement_isSupplementedBy"));
accumulators
.put(
"resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn"));
.put("resultResult_dedup_isMergedIn", sc.longAccumulator("resultResult_dedup_isMergedIn"));
accumulators.put("resultResult_dedup_merges", sc.longAccumulator("resultResult_dedup_merges"));
accumulators
@ -152,16 +138,11 @@ public class XmlConverterJob {
"resultResult_publicationDataset_isRelatedTo",
sc.longAccumulator("resultResult_publicationDataset_isRelatedTo"));
accumulators
.put(
"resultResult_relationship_isRelatedTo",
sc.longAccumulator("resultResult_relationship_isRelatedTo"));
.put("resultResult_relationship_isRelatedTo", sc.longAccumulator("resultResult_relationship_isRelatedTo"));
accumulators
.put(
"resultProject_outcome_isProducedBy",
sc.longAccumulator("resultProject_outcome_isProducedBy"));
.put("resultProject_outcome_isProducedBy", sc.longAccumulator("resultProject_outcome_isProducedBy"));
accumulators
.put(
"resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces"));
.put("resultProject_outcome_produces", sc.longAccumulator("resultProject_outcome_produces"));
accumulators
.put(
"resultOrganization_affiliation_isAuthorInstitutionOf",
@ -184,9 +165,7 @@ public class XmlConverterJob {
"organizationOrganization_dedup_isMergedIn",
sc.longAccumulator("organizationOrganization_dedup_isMergedIn"));
accumulators
.put(
"organizationOrganization_dedup_merges",
sc.longAccumulator("resultProject_outcome_produces"));
.put("organizationOrganization_dedup_merges", sc.longAccumulator("resultProject_outcome_produces"));
accumulators
.put(
"datasourceOrganization_provision_isProvidedBy",

View File

@ -1,7 +1,8 @@
package eu.dnetlib.dhp.oa.provision.utils;
import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.*;
import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.authorPidTypes;
import static eu.dnetlib.dhp.oa.provision.utils.GraphMappingUtils.getRelDescriptor;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import static org.apache.commons.lang3.StringUtils.substringBefore;
@ -9,14 +10,23 @@ import java.io.IOException;
import java.io.Serializable;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.*;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import javax.xml.transform.*;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.util.LongAccumulator;
import org.dom4j.Document;
@ -36,19 +46,38 @@ import com.google.common.collect.Sets;
import com.mycila.xmltool.XMLDoc;
import com.mycila.xmltool.XMLTag;
import eu.dnetlib.dhp.oa.provision.model.*;
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
import eu.dnetlib.dhp.schema.common.EntityType;
import eu.dnetlib.dhp.schema.common.MainEntityType;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.Datasource;
import eu.dnetlib.dhp.schema.oaf.ExternalReference;
import eu.dnetlib.dhp.schema.oaf.ExtraInfo;
import eu.dnetlib.dhp.schema.oaf.Instance;
import eu.dnetlib.dhp.schema.oaf.Journal;
import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.OafEntity;
import eu.dnetlib.dhp.schema.oaf.Organization;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Project;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.schema.oaf.Software;
import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
public class XmlRecordFactory implements Serializable {
private final Map<String, LongAccumulator> accumulators;
/**
*
*/
private static final long serialVersionUID = 2912912999272373172L;
private final Set<String> specialDatasourceTypes;
private final Map<String, LongAccumulator> accumulators;
private final ContextMapper contextMapper;
@ -61,23 +90,20 @@ public class XmlRecordFactory implements Serializable {
public XmlRecordFactory(
final ContextMapper contextMapper,
final boolean indent,
final String schemaLocation,
final String otherDatasourceTypesUForUI) {
final String schemaLocation) {
this(Maps.newHashMap(), contextMapper, indent, schemaLocation, otherDatasourceTypesUForUI);
this(Maps.newHashMap(), contextMapper, indent, schemaLocation);
}
public XmlRecordFactory(
final Map<String, LongAccumulator> accumulators,
final ContextMapper contextMapper,
final boolean indent,
final String schemaLocation,
final String otherDatasourceTypesUForUI) {
final String schemaLocation) {
this.accumulators = accumulators;
this.contextMapper = contextMapper;
this.schemaLocation = schemaLocation;
this.specialDatasourceTypes = Sets.newHashSet(Splitter.on(",").trimResults().split(otherDatasourceTypesUForUI));
this.indent = indent;
}
@ -87,8 +113,8 @@ public class XmlRecordFactory implements Serializable {
final Set<String> contexts = Sets.newHashSet();
// final OafEntity entity = toOafEntity(je.getEntity());
OafEntity entity = je.getEntity();
TemplateFactory templateFactory = new TemplateFactory();
final OafEntity entity = je.getEntity();
final TemplateFactory templateFactory = new TemplateFactory();
try {
final EntityType type = EntityType.fromClass(entity.getClass());
@ -110,11 +136,7 @@ public class XmlRecordFactory implements Serializable {
final String body = templateFactory
.buildBody(
mainType,
metadata,
relations,
listChildren(entity, je, templateFactory),
listExtraInfo(entity));
mainType, metadata, relations, listChildren(entity, je, templateFactory), listExtraInfo(entity));
return printXML(templateFactory.buildRecord(entity, schemaLocation, body), indent);
} catch (final Throwable e) {
@ -142,19 +164,19 @@ public class XmlRecordFactory implements Serializable {
default:
throw new IllegalArgumentException("invalid type: " + type);
}
} catch (IOException e) {
} catch (final IOException e) {
throw new IllegalArgumentException(e);
}
}
private String printXML(String xml, boolean indent) {
private String printXML(final String xml, final boolean indent) {
try {
final Document doc = new SAXReader().read(new StringReader(xml));
OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat();
final OutputFormat format = indent ? OutputFormat.createPrettyPrint() : OutputFormat.createCompactFormat();
format.setExpandEmptyElements(false);
format.setSuppressDeclaration(true);
StringWriter sw = new StringWriter();
XMLWriter writer = new XMLWriter(sw, format);
final StringWriter sw = new StringWriter();
final XMLWriter writer = new XMLWriter(sw, format);
writer.write(doc);
return sw.toString();
} catch (IOException | DocumentException e) {
@ -163,7 +185,9 @@ public class XmlRecordFactory implements Serializable {
}
private List<String> metadata(
final EntityType type, final OafEntity entity, final Set<String> contexts) {
final EntityType type,
final OafEntity entity,
final Set<String> contexts) {
final List<String> metadata = Lists.newArrayList();
@ -230,72 +254,63 @@ public class XmlRecordFactory implements Serializable {
.getAuthor()
.stream()
.filter(Objects::nonNull)
.map(
a -> {
final StringBuilder sb = new StringBuilder("<creator rank=\"" + a.getRank() + "\"");
if (isNotBlank(a.getName())) {
sb.append(" name=\"" + XmlSerializationUtils.escapeXml(a.getName()) + "\"");
}
if (isNotBlank(a.getSurname())) {
sb
.append(
" surname=\"" + XmlSerializationUtils.escapeXml(a.getSurname()) + "\"");
}
if (a.getPid() != null) {
a
.getPid()
.stream()
.filter(Objects::nonNull)
.filter(
sp -> isNotBlank(sp.getQualifier().getClassid())
&& isNotBlank(sp.getValue()))
.collect(
Collectors
.toMap(
p -> getAuthorPidType(p.getQualifier().getClassid()),
p -> p,
(p1, p2) -> p1))
.values()
.stream()
.collect(
Collectors
.groupingBy(
p -> p.getValue(),
Collectors
.mapping(
p -> p,
Collectors.minBy(new AuthorPidTypeComparator()))))
.values()
.stream()
.map(op -> op.get())
.forEach(
sp -> {
String pidType = getAuthorPidType(sp.getQualifier().getClassid());
String pidValue = XmlSerializationUtils.escapeXml(sp.getValue());
// ugly hack: some records provide swapped pidtype and pidvalue
if (authorPidTypes.contains(pidValue.toLowerCase().trim())) {
sb.append(String.format(" %s=\"%s\"", pidValue, pidType));
} else {
if (isNotBlank(pidType)) {
sb
.append(
String
.format(
" %s=\"%s\"",
pidType,
pidValue
.toLowerCase()
.replaceAll("^.*orcid\\.org\\/", "")));
}
}
});
}
.map(a -> {
final StringBuilder sb = new StringBuilder("<creator rank=\"" + a.getRank() + "\"");
if (isNotBlank(a.getName())) {
sb.append(" name=\"" + XmlSerializationUtils.escapeXml(a.getName()) + "\"");
}
if (isNotBlank(a.getSurname())) {
sb
.append(
">" + XmlSerializationUtils.escapeXml(a.getFullname()) + "</creator>");
return sb.toString();
})
.append(" surname=\"" + XmlSerializationUtils.escapeXml(a.getSurname()) + "\"");
}
if (a.getPid() != null) {
a
.getPid()
.stream()
.filter(Objects::nonNull)
.filter(
sp -> isNotBlank(sp.getQualifier().getClassid())
&& isNotBlank(sp.getValue()))
.collect(
Collectors
.toMap(
p -> getAuthorPidType(p.getQualifier().getClassid()), p -> p,
(p1, p2) -> p1))
.values()
.stream()
.collect(
Collectors
.groupingBy(
p -> p.getValue(), Collectors
.mapping(
p -> p, Collectors.minBy(new AuthorPidTypeComparator()))))
.values()
.stream()
.map(op -> op.get())
.forEach(sp -> {
final String pidType = getAuthorPidType(sp.getQualifier().getClassid());
final String pidValue = XmlSerializationUtils.escapeXml(sp.getValue());
// ugly hack: some records provide swapped pidtype and pidvalue
if (authorPidTypes.contains(pidValue.toLowerCase().trim())) {
sb.append(String.format(" %s=\"%s\"", pidValue, pidType));
} else {
if (isNotBlank(pidType)) {
sb
.append(
String
.format(
" %s=\"%s\"", pidType, pidValue
.toLowerCase()
.replaceAll("^.*orcid\\.org\\/", "")));
}
}
});
}
sb
.append(">" + XmlSerializationUtils.escapeXml(a.getFullname()) + "</creator>");
return sb.toString();
})
.collect(Collectors.toList()));
}
if (r.getContributor() != null) {
@ -332,8 +347,7 @@ public class XmlRecordFactory implements Serializable {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"dateofacceptance", r.getDateofacceptance().getValue()));
.asXmlElement("dateofacceptance", r.getDateofacceptance().getValue()));
}
if (r.getDescription() != null) {
metadata
@ -347,8 +361,7 @@ public class XmlRecordFactory implements Serializable {
}
if (r.getEmbargoenddate() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue()));
.add(XmlSerializationUtils.asXmlElement("embargoenddate", r.getEmbargoenddate().getValue()));
}
if (r.getSubject() != null) {
metadata
@ -423,23 +436,20 @@ public class XmlRecordFactory implements Serializable {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"lastmetadataupdate", d.getLastmetadataupdate().getValue()));
.asXmlElement("lastmetadataupdate", d.getLastmetadataupdate().getValue()));
}
if (d.getMetadataversionnumber() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"metadataversionnumber", d.getMetadataversionnumber().getValue()));
.asXmlElement("metadataversionnumber", d.getMetadataversionnumber().getValue()));
}
if (d.getSize() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("size", d.getSize().getValue()));
}
if (d.getStoragedate() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue()));
.add(XmlSerializationUtils.asXmlElement("storagedate", d.getStoragedate().getValue()));
}
if (d.getVersion() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("version", d.getVersion().getValue()));
@ -509,98 +519,87 @@ public class XmlRecordFactory implements Serializable {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"codeRepositoryUrl", s.getCodeRepositoryUrl().getValue()));
.asXmlElement("codeRepositoryUrl", s.getCodeRepositoryUrl().getValue()));
}
if (s.getProgrammingLanguage() != null) {
metadata
.add(
XmlSerializationUtils
.mapQualifier(
"programmingLanguage", s.getProgrammingLanguage()));
.mapQualifier("programmingLanguage", s.getProgrammingLanguage()));
}
break;
case datasource:
final Datasource ds = (Datasource) entity;
if (ds.getDatasourcetype() != null) {
mapDatasourceType(metadata, ds.getDatasourcetype());
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", ds.getDatasourcetype()));
}
if (ds.getDatasourcetypeui() != null) {
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", ds.getDatasourcetypeui()));
}
if (ds.getOpenairecompatibility() != null) {
metadata
.add(
XmlSerializationUtils
.mapQualifier(
"openairecompatibility", ds.getOpenairecompatibility()));
.mapQualifier("openairecompatibility", ds.getOpenairecompatibility()));
}
if (ds.getOfficialname() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue()));
.add(XmlSerializationUtils.asXmlElement("officialname", ds.getOfficialname().getValue()));
}
if (ds.getEnglishname() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue()));
.add(XmlSerializationUtils.asXmlElement("englishname", ds.getEnglishname().getValue()));
}
if (ds.getWebsiteurl() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue()));
.add(XmlSerializationUtils.asXmlElement("websiteurl", ds.getWebsiteurl().getValue()));
}
if (ds.getLogourl() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("logourl", ds.getLogourl().getValue()));
}
if (ds.getContactemail() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue()));
.add(XmlSerializationUtils.asXmlElement("contactemail", ds.getContactemail().getValue()));
}
if (ds.getNamespaceprefix() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"namespaceprefix", ds.getNamespaceprefix().getValue()));
.asXmlElement("namespaceprefix", ds.getNamespaceprefix().getValue()));
}
if (ds.getLatitude() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("latitude", ds.getLatitude().getValue()));
}
if (ds.getLongitude() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue()));
.add(XmlSerializationUtils.asXmlElement("longitude", ds.getLongitude().getValue()));
}
if (ds.getDateofvalidation() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"dateofvalidation", ds.getDateofvalidation().getValue()));
.asXmlElement("dateofvalidation", ds.getDateofvalidation().getValue()));
}
if (ds.getDescription() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue()));
.add(XmlSerializationUtils.asXmlElement("description", ds.getDescription().getValue()));
}
if (ds.getOdnumberofitems() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"odnumberofitems", ds.getOdnumberofitems().getValue()));
.asXmlElement("odnumberofitems", ds.getOdnumberofitems().getValue()));
}
if (ds.getOdnumberofitemsdate() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue()));
.asXmlElement("odnumberofitemsdate", ds.getOdnumberofitemsdate().getValue()));
}
if (ds.getOdpolicies() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue()));
.add(XmlSerializationUtils.asXmlElement("odpolicies", ds.getOdpolicies().getValue()));
}
if (ds.getOdlanguages() != null) {
metadata
@ -635,50 +634,43 @@ public class XmlRecordFactory implements Serializable {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"releasestartdate", ds.getReleaseenddate().getValue()));
.asXmlElement("releasestartdate", ds.getReleaseenddate().getValue()));
}
if (ds.getReleaseenddate() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"releaseenddate", ds.getReleaseenddate().getValue()));
.asXmlElement("releaseenddate", ds.getReleaseenddate().getValue()));
}
if (ds.getMissionstatementurl() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"missionstatementurl", ds.getMissionstatementurl().getValue()));
.asXmlElement("missionstatementurl", ds.getMissionstatementurl().getValue()));
}
if (ds.getDataprovider() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"dataprovider", ds.getDataprovider().getValue().toString()));
.asXmlElement("dataprovider", ds.getDataprovider().getValue().toString()));
}
if (ds.getServiceprovider() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"serviceprovider", ds.getServiceprovider().getValue().toString()));
.asXmlElement("serviceprovider", ds.getServiceprovider().getValue().toString()));
}
if (ds.getDatabaseaccesstype() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"databaseaccesstype", ds.getDatabaseaccesstype().getValue()));
.asXmlElement("databaseaccesstype", ds.getDatabaseaccesstype().getValue()));
}
if (ds.getDatauploadtype() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"datauploadtype", ds.getDatauploadtype().getValue()));
.asXmlElement("datauploadtype", ds.getDatauploadtype().getValue()));
}
if (ds.getDatabaseaccessrestriction() != null) {
metadata
@ -691,39 +683,33 @@ public class XmlRecordFactory implements Serializable {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"datauploadrestriction", ds.getDatauploadrestriction().getValue()));
.asXmlElement("datauploadrestriction", ds.getDatauploadrestriction().getValue()));
}
if (ds.getVersioning() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"versioning", ds.getVersioning().getValue().toString()));
.asXmlElement("versioning", ds.getVersioning().getValue().toString()));
}
if (ds.getCitationguidelineurl() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"citationguidelineurl", ds.getCitationguidelineurl().getValue()));
.asXmlElement("citationguidelineurl", ds.getCitationguidelineurl().getValue()));
}
if (ds.getQualitymanagementkind() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"qualitymanagementkind", ds.getQualitymanagementkind().getValue()));
.asXmlElement("qualitymanagementkind", ds.getQualitymanagementkind().getValue()));
}
if (ds.getPidsystems() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue()));
.add(XmlSerializationUtils.asXmlElement("pidsystems", ds.getPidsystems().getValue()));
}
if (ds.getCertificates() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue()));
.add(XmlSerializationUtils.asXmlElement("certificates", ds.getCertificates().getValue()));
}
if (ds.getPolicies() != null) {
metadata
@ -757,13 +743,11 @@ public class XmlRecordFactory implements Serializable {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"legalshortname", o.getLegalshortname().getValue()));
.asXmlElement("legalshortname", o.getLegalshortname().getValue()));
}
if (o.getLegalname() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue()));
.add(XmlSerializationUtils.asXmlElement("legalname", o.getLegalname().getValue()));
}
if (o.getAlternativeNames() != null) {
metadata
@ -777,8 +761,7 @@ public class XmlRecordFactory implements Serializable {
}
if (o.getWebsiteurl() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue()));
.add(XmlSerializationUtils.asXmlElement("websiteurl", o.getWebsiteurl().getValue()));
}
if (o.getLogourl() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("logourl", o.getLogourl().getValue()));
@ -786,32 +769,27 @@ public class XmlRecordFactory implements Serializable {
if (o.getEclegalbody() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue()));
.add(XmlSerializationUtils.asXmlElement("eclegalbody", o.getEclegalbody().getValue()));
}
if (o.getEclegalperson() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue()));
.add(XmlSerializationUtils.asXmlElement("eclegalperson", o.getEclegalperson().getValue()));
}
if (o.getEcnonprofit() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue()));
.add(XmlSerializationUtils.asXmlElement("ecnonprofit", o.getEcnonprofit().getValue()));
}
if (o.getEcresearchorganization() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"ecresearchorganization", o.getEcresearchorganization().getValue()));
.asXmlElement("ecresearchorganization", o.getEcresearchorganization().getValue()));
}
if (o.getEchighereducation() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"echighereducation", o.getEchighereducation().getValue()));
.asXmlElement("echighereducation", o.getEchighereducation().getValue()));
}
if (o.getEcinternationalorganizationeurinterests() != null) {
metadata
@ -830,20 +808,17 @@ public class XmlRecordFactory implements Serializable {
}
if (o.getEcenterprise() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue()));
.add(XmlSerializationUtils.asXmlElement("ecenterprise", o.getEcenterprise().getValue()));
}
if (o.getEcsmevalidated() != null) {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"ecsmevalidated", o.getEcsmevalidated().getValue()));
.asXmlElement("ecsmevalidated", o.getEcsmevalidated().getValue()));
}
if (o.getEcnutscode() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue()));
.add(XmlSerializationUtils.asXmlElement("ecnutscode", o.getEcnutscode().getValue()));
}
if (o.getCountry() != null) {
metadata.add(XmlSerializationUtils.mapQualifier("country", o.getCountry()));
@ -855,8 +830,7 @@ public class XmlRecordFactory implements Serializable {
if (p.getWebsiteurl() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue()));
.add(XmlSerializationUtils.asXmlElement("websiteurl", p.getWebsiteurl().getValue()));
}
if (p.getCode() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("code", p.getCode().getValue()));
@ -869,8 +843,7 @@ public class XmlRecordFactory implements Serializable {
}
if (p.getStartdate() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue()));
.add(XmlSerializationUtils.asXmlElement("startdate", p.getStartdate().getValue()));
}
if (p.getEnddate() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("enddate", p.getEnddate().getValue()));
@ -879,8 +852,7 @@ public class XmlRecordFactory implements Serializable {
metadata
.add(
XmlSerializationUtils
.asXmlElement(
"callidentifier", p.getCallidentifier().getValue()));
.asXmlElement("callidentifier", p.getCallidentifier().getValue()));
}
if (p.getKeywords() != null) {
metadata.add(XmlSerializationUtils.asXmlElement("keywords", p.getKeywords().getValue()));
@ -890,8 +862,7 @@ public class XmlRecordFactory implements Serializable {
}
if (p.getEcarticle29_3() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue()));
.add(XmlSerializationUtils.asXmlElement("ecarticle29_3", p.getEcarticle29_3().getValue()));
}
if (p.getSubjects() != null) {
metadata
@ -923,13 +894,11 @@ public class XmlRecordFactory implements Serializable {
}
if (p.getTotalcost() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("totalcost", p.getTotalcost().toString()));
.add(XmlSerializationUtils.asXmlElement("totalcost", p.getTotalcost().toString()));
}
if (p.getFundedamount() != null) {
metadata
.add(
XmlSerializationUtils.asXmlElement("fundedamount", p.getFundedamount().toString()));
.add(XmlSerializationUtils.asXmlElement("fundedamount", p.getFundedamount().toString()));
}
if (p.getFundingtree() != null) {
metadata
@ -950,28 +919,18 @@ public class XmlRecordFactory implements Serializable {
return metadata;
}
private String getAuthorPidType(String s) {
private String getAuthorPidType(final String s) {
return XmlSerializationUtils
.escapeXml(s)
.replaceAll("\\W", "")
.replaceAll("\\d", "");
}
private static boolean kvNotBlank(KeyValue kv) {
private static boolean kvNotBlank(final KeyValue kv) {
return kv != null && StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue());
}
private void mapDatasourceType(List<String> metadata, final Qualifier dsType) {
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", dsType));
if (specialDatasourceTypes.contains(dsType.getClassid())) {
dsType.setClassid("other");
dsType.setClassname("other");
}
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", dsType));
}
private List<String> mapFields(RelatedEntityWrapper link, Set<String> contexts) {
private List<String> mapFields(final RelatedEntityWrapper link, final Set<String> contexts) {
final Relation rel = link.getRelation();
final RelatedEntity re = link.getTarget();
final String targetType = link.getTarget().getType();
@ -987,16 +946,14 @@ public class XmlRecordFactory implements Serializable {
}
if (isNotBlank(re.getDateofacceptance())) {
metadata
.add(
XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance()));
.add(XmlSerializationUtils.asXmlElement("dateofacceptance", re.getDateofacceptance()));
}
if (isNotBlank(re.getPublisher())) {
metadata.add(XmlSerializationUtils.asXmlElement("publisher", re.getPublisher()));
}
if (isNotBlank(re.getCodeRepositoryUrl())) {
metadata
.add(
XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl()));
.add(XmlSerializationUtils.asXmlElement("coderepositoryurl", re.getCodeRepositoryUrl()));
}
if (re.getResulttype() != null && re.getResulttype().isBlank()) {
metadata.add(XmlSerializationUtils.mapQualifier("resulttype", re.getResulttype()));
@ -1026,14 +983,16 @@ public class XmlRecordFactory implements Serializable {
metadata.add(XmlSerializationUtils.asXmlElement("officialname", re.getOfficialname()));
}
if (re.getDatasourcetype() != null && !re.getDatasourcetype().isBlank()) {
mapDatasourceType(metadata, re.getDatasourcetype());
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetype", re.getDatasourcetype()));
}
if (re.getDatasourcetypeui() != null && !re.getDatasourcetypeui().isBlank()) {
metadata.add(XmlSerializationUtils.mapQualifier("datasourcetypeui", re.getDatasourcetypeui()));
}
if (re.getOpenairecompatibility() != null && !re.getOpenairecompatibility().isBlank()) {
metadata
.add(
XmlSerializationUtils
.mapQualifier(
"openairecompatibility", re.getOpenairecompatibility()));
.mapQualifier("openairecompatibility", re.getOpenairecompatibility()));
}
break;
case organization:
@ -1042,8 +1001,7 @@ public class XmlRecordFactory implements Serializable {
}
if (isNotBlank(re.getLegalshortname())) {
metadata
.add(
XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname()));
.add(XmlSerializationUtils.asXmlElement("legalshortname", re.getLegalshortname()));
}
if (re.getCountry() != null && !re.getCountry().isBlank()) {
metadata.add(XmlSerializationUtils.mapQualifier("country", re.getCountry()));
@ -1085,8 +1043,10 @@ public class XmlRecordFactory implements Serializable {
return metadata;
}
private String mapRelation(Set<String> contexts, TemplateFactory templateFactory, EntityType type,
RelatedEntityWrapper link) {
private String mapRelation(final Set<String> contexts,
final TemplateFactory templateFactory,
final EntityType type,
final RelatedEntityWrapper link) {
final Relation rel = link.getRelation();
final String targetType = link.getTarget().getType();
final String scheme = ModelSupport.getScheme(type.toString(), targetType);
@ -1096,8 +1056,9 @@ public class XmlRecordFactory implements Serializable {
String.format("missing scheme for: <%s - %s>", type, targetType));
}
final HashSet<String> fields = Sets.newHashSet(mapFields(link, contexts));
if (rel.getValidated() == null)
if (rel.getValidated() == null) {
rel.setValidated(false);
}
return templateFactory
.getRel(
targetType, rel.getTarget(), fields, rel.getRelClass(), scheme, rel.getDataInfo(), rel.getValidated(),
@ -1105,12 +1066,14 @@ public class XmlRecordFactory implements Serializable {
}
private List<String> listChildren(
final OafEntity entity, JoinedEntity je, TemplateFactory templateFactory) {
final OafEntity entity,
final JoinedEntity je,
final TemplateFactory templateFactory) {
final EntityType entityType = EntityType.fromClass(je.getEntity().getClass());
final List<RelatedEntityWrapper> links = je.getLinks();
List<String> children = links
final List<String> children = links
.stream()
.filter(link -> isDuplicate(link))
.map(link -> {
@ -1131,13 +1094,11 @@ public class XmlRecordFactory implements Serializable {
if (instance.getAccessright() != null && !instance.getAccessright().isBlank()) {
fields
.add(
XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright()));
.add(XmlSerializationUtils.mapQualifier("accessright", instance.getAccessright()));
}
if (instance.getCollectedfrom() != null && kvNotBlank(instance.getCollectedfrom())) {
fields
.add(
XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom()));
.add(XmlSerializationUtils.mapKeyValue("collectedfrom", instance.getCollectedfrom()));
}
if (instance.getHostedby() != null && kvNotBlank(instance.getHostedby())) {
fields.add(XmlSerializationUtils.mapKeyValue("hostedby", instance.getHostedby()));
@ -1147,20 +1108,17 @@ public class XmlRecordFactory implements Serializable {
fields
.add(
XmlSerializationUtils
.asXmlElement(
"dateofacceptance", instance.getDateofacceptance().getValue()));
.asXmlElement("dateofacceptance", instance.getDateofacceptance().getValue()));
}
if (instance.getInstancetype() != null && !instance.getInstancetype().isBlank()) {
fields
.add(
XmlSerializationUtils.mapQualifier("instancetype", instance.getInstancetype()));
.add(XmlSerializationUtils.mapQualifier("instancetype", instance.getInstancetype()));
}
if (isNotBlank(instance.getDistributionlocation())) {
fields
.add(
XmlSerializationUtils
.asXmlElement(
"distributionlocation", instance.getDistributionlocation()));
.asXmlElement("distributionlocation", instance.getDistributionlocation()));
}
if (instance.getPid() != null) {
fields
@ -1185,8 +1143,7 @@ public class XmlRecordFactory implements Serializable {
if (instance.getRefereed() != null && !instance.getRefereed().isBlank()) {
fields
.add(
XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed()));
.add(XmlSerializationUtils.mapQualifier("refereed", instance.getRefereed()));
}
if (instance.getProcessingchargeamount() != null
&& isNotBlank(instance.getProcessingchargeamount().getValue())) {
@ -1208,8 +1165,7 @@ public class XmlRecordFactory implements Serializable {
children
.add(
templateFactory
.getInstance(
instance.getHostedby().getKey(), fields, instance.getUrl()));
.getInstance(instance.getHostedby().getKey(), fields, instance.getUrl()));
}
}
final List<ExternalReference> ext = ((Result) entity).getExternalReference();
@ -1254,11 +1210,11 @@ public class XmlRecordFactory implements Serializable {
return children;
}
private boolean isDuplicate(RelatedEntityWrapper link) {
private boolean isDuplicate(final RelatedEntityWrapper link) {
return ModelConstants.DEDUP.equalsIgnoreCase(link.getRelation().getSubRelType());
}
private List<String> listExtraInfo(OafEntity entity) {
private List<String> listExtraInfo(final OafEntity entity) {
final List<ExtraInfo> extraInfo = entity.getExtraInfo();
return extraInfo != null
? extraInfo
@ -1271,7 +1227,7 @@ public class XmlRecordFactory implements Serializable {
private List<String> buildContexts(final String type, final Set<String> contexts) {
final List<String> res = Lists.newArrayList();
if ((contextMapper != null)
if (contextMapper != null
&& !contextMapper.isEmpty()
&& MainEntityType.result.toString().equals(type)) {
@ -1302,8 +1258,7 @@ public class XmlRecordFactory implements Serializable {
if (def.getName().equals("category")) {
final String rootId = substringBefore(def.getId(), "::");
document = addContextDef(
document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()),
def);
document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def);
}
if (def.getName().equals("concept")) {
@ -1327,17 +1282,17 @@ public class XmlRecordFactory implements Serializable {
private Transformer getTransformer() {
try {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
final Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
return transformer;
} catch (TransformerConfigurationException e) {
} catch (final TransformerConfigurationException e) {
throw new IllegalStateException("unable to create javax.xml.transform.Transformer", e);
}
}
private XMLTag addContextDef(final XMLTag tag, final ContextDef def) {
tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel());
if ((def.getType() != null) && !def.getType().isEmpty()) {
if (def.getType() != null && !def.getType().isEmpty()) {
tag.addAttribute("type", def.getType());
}
return tag;
@ -1374,16 +1329,14 @@ public class XmlRecordFactory implements Serializable {
if (level0 != null) {
final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name"));
contextMapper
.put(
level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", ""));
.put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", ""));
final Node level1 = fundingPath.selectSingleNode("//funding_level_1");
if (level1 == null) {
contexts.add(level0Id);
} else {
final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name"));
contextMapper
.put(
level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", ""));
.put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", ""));
final Node level2 = fundingPath.selectSingleNode("//funding_level_2");
if (level2 == null) {
contexts.add(level1Id);
@ -1391,8 +1344,7 @@ public class XmlRecordFactory implements Serializable {
final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name"));
contextMapper
.put(
level2Id,
new ContextDef(level2Id, level2.valueOf("./description"), "concept", ""));
level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", ""));
contexts.add(level2Id);
}
}
@ -1413,8 +1365,7 @@ public class XmlRecordFactory implements Serializable {
funding += getFunderElement(ftree);
for (final Object o : Lists
.reverse(
ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) {
.reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) {
final Element e = (Element) o;
final String _id = e.valueOf("./id");
funding += "<"

View File

@ -16,11 +16,5 @@
"paramLongName": "isLookupUrl",
"paramDescription": "URL of the isLookUp Service",
"paramRequired": true
},
{
"paramName": "odt",
"paramLongName": "otherDsTypeId",
"paramDescription": "list of datasource types to populate field datasourcetypeui",
"paramRequired": true
}
]

View File

@ -25,10 +25,6 @@
<name>targetMaxRelations</name>
<description>maximum number of relations allowed for a each entity grouping by target</description>
</property>
<property>
<name>otherDsTypeId</name>
<description>mapping used to populate datasourceTypeUi field</description>
</property>
<property>
<name>format</name>
<description>metadata format name (DMF|TMF)</description>
@ -582,7 +578,6 @@
<arg>--inputPath</arg><arg>${workingDir}/join_entities</arg>
<arg>--outputPath</arg><arg>${workingDir}/xml</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--otherDsTypeId</arg><arg>${otherDsTypeId}</arg>
</spark>
<ok to="should_index"/>
<error to="Kill"/>

View File

@ -4,7 +4,6 @@ package eu.dnetlib.dhp.oa.provision;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import java.io.IOException;
import java.util.List;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
@ -16,11 +15,9 @@ import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.Lists;
import eu.dnetlib.dhp.oa.provision.model.JoinedEntity;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntity;
import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper;
import eu.dnetlib.dhp.oa.provision.utils.ContextMapper;
import eu.dnetlib.dhp.oa.provision.utils.StreamingInputDocumentFactory;
@ -49,7 +46,7 @@ public class IndexRecordTransformerTest {
@Test
public void testPreBuiltRecordTransformation() throws IOException, TransformerException {
String record = IOUtils.toString(getClass().getResourceAsStream("record.xml"));
final String record = IOUtils.toString(getClass().getResourceAsStream("record.xml"));
testRecordTransformation(record);
}
@ -57,14 +54,14 @@ public class IndexRecordTransformerTest {
@Test
public void testPublicationRecordTransformation() throws IOException, TransformerException {
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation,
XmlRecordFactoryTest.otherDsTypeId);
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
XmlConverterJob.schemaLocation);
Publication p = load("publication.json", Publication.class);
Project pj = load("project.json", Project.class);
Relation rel = load("relToValidatedProject.json", Relation.class);
final Publication p = load("publication.json", Publication.class);
final Project pj = load("project.json", Project.class);
final Relation rel = load("relToValidatedProject.json", Relation.class);
JoinedEntity je = new JoinedEntity<>(p);
final JoinedEntity je = new JoinedEntity<>(p);
je
.setLinks(
Lists
@ -72,24 +69,25 @@ public class IndexRecordTransformerTest {
new RelatedEntityWrapper(rel,
CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class))));
String record = xmlRecordFactory.build(je);
final String record = xmlRecordFactory.build(je);
assertNotNull(record);
testRecordTransformation(record);
}
private void testRecordTransformation(String record) throws IOException, TransformerException {
String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml"));
String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"));
private void testRecordTransformation(final String record) throws IOException, TransformerException {
final String fields = IOUtils.toString(getClass().getResourceAsStream("fields.xml"));
final String xslt = IOUtils.toString(getClass().getResourceAsStream("layoutToRecordTransformer.xsl"));
String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt);
final String transformer = XmlIndexingJob.getLayoutTransformer("DMF", fields, xslt);
Transformer tr = SaxonTransformerFactory.newInstance(transformer);
final Transformer tr = SaxonTransformerFactory.newInstance(transformer);
String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record);
final String indexRecordXML = XmlIndexingJob.toIndexRecord(tr, record);
SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID).parseDocument(indexRecordXML);
final SolrInputDocument solrDoc = new StreamingInputDocumentFactory(VERSION, DSID)
.parseDocument(indexRecordXML);
final String xmlDoc = ClientUtils.toXML(solrDoc);
@ -97,7 +95,7 @@ public class IndexRecordTransformerTest {
System.out.println(xmlDoc);
}
private <T> T load(String fileName, Class<T> clazz) throws IOException {
private <T> T load(final String fileName, final Class<T> clazz) throws IOException {
return XmlRecordFactoryTest.OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream(fileName)), clazz);
}

View File

@ -1,7 +1,8 @@
package eu.dnetlib.dhp.oa.provision;
import static org.junit.jupiter.api.Assertions.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import java.io.IOException;
import java.io.StringReader;
@ -29,27 +30,25 @@ import eu.dnetlib.dhp.schema.oaf.Relation;
public class XmlRecordFactoryTest {
public static final String otherDsTypeId = "scholarcomminfra,infospace,pubsrepository::mock,entityregistry,entityregistry::projects,entityregistry::repositories,websource";
public static ObjectMapper OBJECT_MAPPER = new ObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
@Test
public void testXMLRecordFactory() throws IOException, DocumentException {
ContextMapper contextMapper = new ContextMapper();
final ContextMapper contextMapper = new ContextMapper();
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation,
otherDsTypeId);
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
XmlConverterJob.schemaLocation);
Publication p = OBJECT_MAPPER
final Publication p = OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
final String xml = xmlRecordFactory.build(new JoinedEntity<>(p));
assertNotNull(xml);
Document doc = new SAXReader().read(new StringReader(xml));
final Document doc = new SAXReader().read(new StringReader(xml));
assertNotNull(doc);
@ -72,30 +71,29 @@ public class XmlRecordFactoryTest {
@Test
public void testXMLRecordFactoryWithValidatedProject() throws IOException, DocumentException {
ContextMapper contextMapper = new ContextMapper();
final ContextMapper contextMapper = new ContextMapper();
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation,
otherDsTypeId);
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
XmlConverterJob.schemaLocation);
Publication p = OBJECT_MAPPER
final Publication p = OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
Project pj = OBJECT_MAPPER
final Project pj = OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class);
Relation rel = OBJECT_MAPPER
.readValue(
(IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json"))), Relation.class);
RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class);
List<RelatedEntityWrapper> links = Lists.newArrayList();
RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
final Relation rel = OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream("relToValidatedProject.json")), Relation.class);
final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class);
final List<RelatedEntityWrapper> links = Lists.newArrayList();
final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
links.add(rew);
JoinedEntity je = new JoinedEntity<>(p);
final JoinedEntity je = new JoinedEntity<>(p);
je.setLinks(links);
String xml = xmlRecordFactory.build(je);
final String xml = xmlRecordFactory.build(je);
assertNotNull(xml);
Document doc = new SAXReader().read(new StringReader(xml));
final Document doc = new SAXReader().read(new StringReader(xml));
assertNotNull(doc);
System.out.println(doc.asXML());
Assertions.assertEquals("2021-01-01", doc.valueOf("//validated/@date"));
@ -104,29 +102,29 @@ public class XmlRecordFactoryTest {
@Test
public void testXMLRecordFactoryWithNonValidatedProject() throws IOException, DocumentException {
ContextMapper contextMapper = new ContextMapper();
final ContextMapper contextMapper = new ContextMapper();
XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, XmlConverterJob.schemaLocation,
otherDsTypeId);
final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false,
XmlConverterJob.schemaLocation);
Publication p = OBJECT_MAPPER
final Publication p = OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream("publication.json")), Publication.class);
Project pj = OBJECT_MAPPER
final Project pj = OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream("project.json")), Project.class);
Relation rel = OBJECT_MAPPER
.readValue((IOUtils.toString(getClass().getResourceAsStream("relToProject.json"))), Relation.class);
RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class);
List<RelatedEntityWrapper> links = Lists.newArrayList();
RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
final Relation rel = OBJECT_MAPPER
.readValue(IOUtils.toString(getClass().getResourceAsStream("relToProject.json")), Relation.class);
final RelatedEntity relatedProject = CreateRelatedEntitiesJob_phase1.asRelatedEntity(pj, Project.class);
final List<RelatedEntityWrapper> links = Lists.newArrayList();
final RelatedEntityWrapper rew = new RelatedEntityWrapper(rel, relatedProject);
links.add(rew);
JoinedEntity je = new JoinedEntity<>(p);
final JoinedEntity je = new JoinedEntity<>(p);
je.setLinks(links);
String xml = xmlRecordFactory.build(je);
final String xml = xmlRecordFactory.build(je);
assertNotNull(xml);
Document doc = new SAXReader().read(new StringReader(xml));
final Document doc = new SAXReader().read(new StringReader(xml));
assertNotNull(doc);
System.out.println(doc.asXML());
assertEquals("", doc.valueOf("//rel/validated"));

View File

@ -736,7 +736,7 @@
<mockito-core.version>3.3.3</mockito-core.version>
<mongodb.driver.version>3.4.2</mongodb.driver.version>
<vtd.version>[2.12,3.0)</vtd.version>
<dhp-schemas.version>[2.7.14]</dhp-schemas.version>
<dhp-schemas.version>[2.7.15-SNAPSHOT]</dhp-schemas.version>
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>