dnet-applications/apps/dnet-is-application/src/main/java/eu/dnetlib/is/importer/OldProfilesImporter.java

144 lines
4.6 KiB
Java

package eu.dnetlib.is.importer;
import java.util.Date;
import javax.transaction.Transactional;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Node;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import eu.dnetlib.is.resource.model.SimpleResource;
import eu.dnetlib.is.resource.repository.SimpleResourceRepository;
import eu.dnetlib.is.util.InformationServiceException;
import eu.dnetlib.is.util.ResourceValidator;
import eu.dnetlib.is.util.XmlIndenter;
import eu.dnetlib.is.vocabulary.model.Synonym;
import eu.dnetlib.is.vocabulary.model.Vocabulary;
import eu.dnetlib.is.vocabulary.model.VocabularyTerm;
import eu.dnetlib.is.vocabulary.repository.VocabularyRepository;
import eu.dnetlib.is.vocabulary.repository.VocabularyTermRepository;
@Service
public class OldProfilesImporter {
@Autowired
private SimpleResourceRepository simpleResourceRepository;
@Autowired
private VocabularyRepository vocabularyRepository;
@Autowired
private VocabularyTermRepository vocabularyTermRepository;
@Autowired
private ResourceValidator resourceValidator;
@Transactional
public String importSimpleResource(final String xml) throws InformationServiceException {
try {
final Document doc = DocumentHelper.parseText(xml);
final String id = StringUtils.substringBefore(doc.valueOf("//RESOURCE_IDENTIFIER/@value"), "_");
final Date now = new Date();
final SimpleResource res = new SimpleResource();
res.setId(id);
res.setCreationDate(now);
res.setModificationDate(now);
String resContent;
switch (doc.valueOf("//RESOURCE_TYPE/@value")) {
case "CleanerDSResourceType":
res.setType("cleaning_rule");
res.setName(doc.valueOf("//CLEANER_NAME"));
res.setDescription(doc.valueOf("//CLEANER_DESCRIPTION"));
resContent = XmlIndenter.indent(doc.selectSingleNode("//CLEANER_RULES"));
break;
case "TransformationRuleDSResourceType":
res.setName(doc.valueOf("//SCRIPT/TITLE"));
res.setDescription("");
if (doc.selectNodes("//*[local-name() = 'stylesheet']").size() > 0) {
res.setType("transformation_rule_xslt");
resContent = XmlIndenter.indent(doc.selectSingleNode("//*[local-name() = 'stylesheet']"));
} else {
final String code = doc.valueOf("//SCRIPT/CODE").trim();
try {
final Document xsltDoc = DocumentHelper.parseText(code);
res.setType("transformation_rule_xslt");
resContent = XmlIndenter.indent(xsltDoc);
} catch (final DocumentException e) {
res.setType("transformation_rule_legacy");
resContent = code;
}
}
break;
case "HadoopJobConfigurationDSResourceType":
res.setType("hadoop_job_configuration");
res.setName(doc.valueOf("//HADOOP_JOB/@name"));
res.setDescription(doc.valueOf("//HADOOP_JOB/DESCRIPTION"));
resContent = XmlIndenter.indent(doc.selectSingleNode("//HADOOP_JOB"));
break;
case "DedupConfigurationDSResourceType":
res.setType("dedup_configuration");
res.setName(doc.valueOf("//DESCRIPTION"));
res.setDescription(doc.valueOf("//DESCRIPTION"));
resContent = doc.valueOf("//DEDUPLICATION");
break;
default:
throw new InformationServiceException("Invalid resource type: " + doc.valueOf("//RESOURCE_TYPE/@value"));
}
resourceValidator.validate(res.getType(), resContent.trim());
simpleResourceRepository.save(res);
simpleResourceRepository.setContentById(id, resContent.trim());
return res.getId();
} catch (final Exception e) {
throw new InformationServiceException("Error parsing file", e);
}
}
@Transactional
public Vocabulary importVocabulary(final String xml) throws Exception {
final Document doc = DocumentHelper.parseText(xml);
final Vocabulary voc = new Vocabulary();
final String vocId = doc.valueOf("//VOCABULARY_NAME/@code");
final String vocName = doc.valueOf("//VOCABULARY_NAME");
final String vocDesc = doc.valueOf("//VOCABULARY_DESCRIPTION");
voc.setId(vocId);
voc.setName(vocName);
voc.setDescription(vocDesc);
vocabularyRepository.save(voc);
for (final Node n : doc.selectNodes("//TERM")) {
final VocabularyTerm term = new VocabularyTerm();
term.setVocabulary(vocId);
term.setCode(n.valueOf("@code"));
term.setName(n.valueOf("@english_name"));
term.setEncoding(n.valueOf("@encoding"));
term.setSynonyms(n.selectNodes(".//SYNONYM")
.stream()
.map(ns -> new Synonym(ns.valueOf("@term"), ns.valueOf("@encoding")))
.sorted()
.distinct()
.toArray(Synonym[]::new));
vocabularyTermRepository.save(term);
}
return voc;
}
}