dnet-applications/apps/dnet-is-application/src/main/java/eu/dnetlib/is/importer/OldProfilesImporter.java

144 lines
4.6 KiB
Java
Raw Normal View History

2022-11-23 15:32:36 +01:00
package eu.dnetlib.is.importer;
2022-11-17 11:19:07 +01:00
import java.util.Date;
import javax.transaction.Transactional;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
2022-11-24 10:19:36 +01:00
import org.dom4j.DocumentException;
2022-12-07 11:42:40 +01:00
import org.dom4j.DocumentHelper;
2022-11-17 11:19:07 +01:00
import org.dom4j.Node;
import org.springframework.beans.factory.annotation.Autowired;
2022-11-28 11:11:13 +01:00
import org.springframework.stereotype.Service;
2022-11-17 11:19:07 +01:00
import eu.dnetlib.is.resource.model.SimpleResource;
import eu.dnetlib.is.resource.repository.SimpleResourceRepository;
2022-11-24 10:19:36 +01:00
import eu.dnetlib.is.util.InformationServiceException;
2022-12-06 10:16:49 +01:00
import eu.dnetlib.is.util.ResourceValidator;
2022-12-02 14:14:50 +01:00
import eu.dnetlib.is.util.XmlIndenter;
2022-11-17 11:19:07 +01:00
import eu.dnetlib.is.vocabulary.model.Synonym;
import eu.dnetlib.is.vocabulary.model.Vocabulary;
import eu.dnetlib.is.vocabulary.model.VocabularyTerm;
import eu.dnetlib.is.vocabulary.repository.VocabularyRepository;
import eu.dnetlib.is.vocabulary.repository.VocabularyTermRepository;
2022-11-28 11:11:13 +01:00
@Service
2022-11-17 11:19:07 +01:00
public class OldProfilesImporter {
@Autowired
private SimpleResourceRepository simpleResourceRepository;
@Autowired
private VocabularyRepository vocabularyRepository;
@Autowired
private VocabularyTermRepository vocabularyTermRepository;
2022-12-06 10:16:49 +01:00
@Autowired
private ResourceValidator resourceValidator;
2022-11-17 11:19:07 +01:00
@Transactional
2022-12-02 13:29:53 +01:00
public String importSimpleResource(final String xml) throws InformationServiceException {
2022-11-17 11:19:07 +01:00
2022-11-24 10:19:36 +01:00
try {
2022-12-07 11:42:40 +01:00
final Document doc = DocumentHelper.parseText(xml);
2022-11-24 10:19:36 +01:00
final String id = StringUtils.substringBefore(doc.valueOf("//RESOURCE_IDENTIFIER/@value"), "_");
final Date now = new Date();
final SimpleResource res = new SimpleResource();
res.setId(id);
res.setCreationDate(now);
res.setModificationDate(now);
String resContent;
switch (doc.valueOf("//RESOURCE_TYPE/@value")) {
case "CleanerDSResourceType":
res.setType("cleaning_rule");
res.setName(doc.valueOf("//CLEANER_NAME"));
res.setDescription(doc.valueOf("//CLEANER_DESCRIPTION"));
2022-12-02 14:14:50 +01:00
resContent = XmlIndenter.indent(doc.selectSingleNode("//CLEANER_RULES"));
2022-11-24 10:19:36 +01:00
break;
case "TransformationRuleDSResourceType":
res.setName(doc.valueOf("//SCRIPT/TITLE"));
res.setDescription("");
if (doc.selectNodes("//*[local-name() = 'stylesheet']").size() > 0) {
res.setType("transformation_rule_xslt");
2022-12-02 14:14:50 +01:00
resContent = XmlIndenter.indent(doc.selectSingleNode("//*[local-name() = 'stylesheet']"));
2022-11-24 10:19:36 +01:00
} else {
final String code = doc.valueOf("//SCRIPT/CODE").trim();
2022-12-07 11:42:40 +01:00
2022-11-24 10:19:36 +01:00
try {
2022-12-07 11:42:40 +01:00
final Document xsltDoc = DocumentHelper.parseText(code);
2022-11-24 10:19:36 +01:00
res.setType("transformation_rule_xslt");
2022-12-02 14:14:50 +01:00
resContent = XmlIndenter.indent(xsltDoc);
2022-11-24 10:19:36 +01:00
} catch (final DocumentException e) {
res.setType("transformation_rule_legacy");
resContent = code;
}
}
break;
case "HadoopJobConfigurationDSResourceType":
res.setType("hadoop_job_configuration");
res.setName(doc.valueOf("//HADOOP_JOB/@name"));
res.setDescription(doc.valueOf("//HADOOP_JOB/DESCRIPTION"));
2022-12-02 14:14:50 +01:00
resContent = XmlIndenter.indent(doc.selectSingleNode("//HADOOP_JOB"));
2022-11-24 10:19:36 +01:00
break;
2022-12-02 13:29:53 +01:00
case "DedupConfigurationDSResourceType":
res.setType("dedup_configuration");
res.setName(doc.valueOf("//DESCRIPTION"));
res.setDescription(doc.valueOf("//DESCRIPTION"));
resContent = doc.valueOf("//DEDUPLICATION");
break;
2022-11-24 10:19:36 +01:00
default:
throw new InformationServiceException("Invalid resource type: " + doc.valueOf("//RESOURCE_TYPE/@value"));
}
2022-12-06 10:16:49 +01:00
resourceValidator.validate(res.getType(), resContent.trim());
2022-11-24 10:19:36 +01:00
simpleResourceRepository.save(res);
2022-12-02 13:29:53 +01:00
simpleResourceRepository.setContentById(id, resContent.trim());
2022-11-24 10:19:36 +01:00
2022-12-02 13:29:53 +01:00
return res.getId();
2022-11-24 10:19:36 +01:00
} catch (final Exception e) {
throw new InformationServiceException("Error parsing file", e);
}
2022-11-17 11:19:07 +01:00
}
@Transactional
public Vocabulary importVocabulary(final String xml) throws Exception {
2022-12-07 11:42:40 +01:00
final Document doc = DocumentHelper.parseText(xml);
2022-11-17 11:19:07 +01:00
final Vocabulary voc = new Vocabulary();
final String vocId = doc.valueOf("//VOCABULARY_NAME/@code");
final String vocName = doc.valueOf("//VOCABULARY_NAME");
final String vocDesc = doc.valueOf("//VOCABULARY_DESCRIPTION");
voc.setId(vocId);
voc.setName(vocName);
voc.setDescription(vocDesc);
vocabularyRepository.save(voc);
for (final Node n : doc.selectNodes("//TERM")) {
final VocabularyTerm term = new VocabularyTerm();
term.setVocabulary(vocId);
term.setCode(n.valueOf("@code"));
term.setName(n.valueOf("@english_name"));
term.setEncoding(n.valueOf("@encoding"));
term.setSynonyms(n.selectNodes(".//SYNONYM")
.stream()
.map(ns -> new Synonym(ns.valueOf("@term"), ns.valueOf("@encoding")))
.sorted()
.distinct()
.toArray(Synonym[]::new));
vocabularyTermRepository.save(term);
}
return voc;
}
}