package eu.dnetlib.is.importer; import java.util.Date; import javax.transaction.Transactional; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Node; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import eu.dnetlib.is.resource.model.SimpleResource; import eu.dnetlib.is.resource.repository.SimpleResourceRepository; import eu.dnetlib.is.util.InformationServiceException; import eu.dnetlib.is.util.ResourceValidator; import eu.dnetlib.is.util.XmlIndenter; import eu.dnetlib.is.vocabulary.model.Synonym; import eu.dnetlib.is.vocabulary.model.Vocabulary; import eu.dnetlib.is.vocabulary.model.VocabularyTerm; import eu.dnetlib.is.vocabulary.repository.VocabularyRepository; import eu.dnetlib.is.vocabulary.repository.VocabularyTermRepository; @Service public class OldProfilesImporter { @Autowired private SimpleResourceRepository simpleResourceRepository; @Autowired private VocabularyRepository vocabularyRepository; @Autowired private VocabularyTermRepository vocabularyTermRepository; @Autowired private ResourceValidator resourceValidator; @Transactional public String importSimpleResource(final String xml) throws InformationServiceException { try { final Document doc = DocumentHelper.parseText(xml); final String id = StringUtils.substringBefore(doc.valueOf("//RESOURCE_IDENTIFIER/@value"), "_"); final Date now = new Date(); final SimpleResource res = new SimpleResource(); res.setId(id); res.setCreationDate(now); res.setModificationDate(now); String resContent; switch (doc.valueOf("//RESOURCE_TYPE/@value")) { case "CleanerDSResourceType": res.setType("cleaning_rule"); res.setName(doc.valueOf("//CLEANER_NAME")); res.setDescription(doc.valueOf("//CLEANER_DESCRIPTION")); resContent = XmlIndenter.indent(doc.selectSingleNode("//CLEANER_RULES")); break; case "TransformationRuleDSResourceType": res.setName(doc.valueOf("//SCRIPT/TITLE")); res.setDescription(""); if (doc.selectNodes("//*[local-name() = 'stylesheet']").size() > 0) { res.setType("transformation_rule_xslt"); resContent = XmlIndenter.indent(doc.selectSingleNode("//*[local-name() = 'stylesheet']")); } else { final String code = doc.valueOf("//SCRIPT/CODE").trim(); try { final Document xsltDoc = DocumentHelper.parseText(code); res.setType("transformation_rule_xslt"); resContent = XmlIndenter.indent(xsltDoc); } catch (final DocumentException e) { res.setType("transformation_rule_legacy"); resContent = code; } } break; case "HadoopJobConfigurationDSResourceType": res.setType("hadoop_job_configuration"); res.setName(doc.valueOf("//HADOOP_JOB/@name")); res.setDescription(doc.valueOf("//HADOOP_JOB/DESCRIPTION")); resContent = XmlIndenter.indent(doc.selectSingleNode("//HADOOP_JOB")); break; case "DedupConfigurationDSResourceType": res.setType("dedup_configuration"); res.setName(doc.valueOf("//DESCRIPTION")); res.setDescription(doc.valueOf("//DESCRIPTION")); resContent = doc.valueOf("//DEDUPLICATION"); break; default: throw new InformationServiceException("Invalid resource type: " + doc.valueOf("//RESOURCE_TYPE/@value")); } resourceValidator.validate(res.getType(), resContent.trim()); simpleResourceRepository.save(res); simpleResourceRepository.setContentById(id, resContent.trim()); return res.getId(); } catch (final Exception e) { throw new InformationServiceException("Error parsing file", e); } } @Transactional public Vocabulary importVocabulary(final String xml) throws Exception { final Document doc = DocumentHelper.parseText(xml); final Vocabulary voc = new Vocabulary(); final String vocId = doc.valueOf("//VOCABULARY_NAME/@code"); final String vocName = doc.valueOf("//VOCABULARY_NAME"); final String vocDesc = doc.valueOf("//VOCABULARY_DESCRIPTION"); voc.setId(vocId); voc.setName(vocName); voc.setDescription(vocDesc); vocabularyRepository.save(voc); for (final Node n : doc.selectNodes("//TERM")) { final VocabularyTerm term = new VocabularyTerm(); term.setVocabulary(vocId); term.setCode(n.valueOf("@code")); term.setName(n.valueOf("@english_name")); term.setEncoding(n.valueOf("@encoding")); term.setSynonyms(n.selectNodes(".//SYNONYM") .stream() .map(ns -> new Synonym(ns.valueOf("@term"), ns.valueOf("@encoding"))) .sorted() .distinct() .toArray(Synonym[]::new)); vocabularyTermRepository.save(term); } return voc; } }