2022-11-23 15:32:36 +01:00
|
|
|
package eu.dnetlib.is.importer;
|
2022-11-17 11:19:07 +01:00
|
|
|
|
|
|
|
import java.io.StringReader;
|
|
|
|
import java.util.Date;
|
|
|
|
|
|
|
|
import javax.transaction.Transactional;
|
|
|
|
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
import org.dom4j.Document;
|
2022-11-24 10:19:36 +01:00
|
|
|
import org.dom4j.DocumentException;
|
2022-11-17 11:19:07 +01:00
|
|
|
import org.dom4j.Node;
|
|
|
|
import org.dom4j.io.SAXReader;
|
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
2022-11-28 11:11:13 +01:00
|
|
|
import org.springframework.stereotype.Service;
|
2022-11-17 11:19:07 +01:00
|
|
|
|
|
|
|
import eu.dnetlib.is.resource.model.SimpleResource;
|
|
|
|
import eu.dnetlib.is.resource.repository.SimpleResourceRepository;
|
2022-11-24 10:19:36 +01:00
|
|
|
import eu.dnetlib.is.util.InformationServiceException;
|
2022-12-02 14:14:50 +01:00
|
|
|
import eu.dnetlib.is.util.XmlIndenter;
|
2022-11-17 11:19:07 +01:00
|
|
|
import eu.dnetlib.is.vocabulary.model.Synonym;
|
|
|
|
import eu.dnetlib.is.vocabulary.model.Vocabulary;
|
|
|
|
import eu.dnetlib.is.vocabulary.model.VocabularyTerm;
|
|
|
|
import eu.dnetlib.is.vocabulary.repository.VocabularyRepository;
|
|
|
|
import eu.dnetlib.is.vocabulary.repository.VocabularyTermRepository;
|
|
|
|
|
2022-11-28 11:11:13 +01:00
|
|
|
@Service
|
2022-11-17 11:19:07 +01:00
|
|
|
public class OldProfilesImporter {
|
|
|
|
|
|
|
|
@Autowired
|
|
|
|
private SimpleResourceRepository simpleResourceRepository;
|
|
|
|
|
|
|
|
@Autowired
|
|
|
|
private VocabularyRepository vocabularyRepository;
|
|
|
|
|
|
|
|
@Autowired
|
|
|
|
private VocabularyTermRepository vocabularyTermRepository;
|
|
|
|
|
|
|
|
@Transactional
|
2022-12-02 13:29:53 +01:00
|
|
|
public String importSimpleResource(final String xml) throws InformationServiceException {
|
2022-11-17 11:19:07 +01:00
|
|
|
final SAXReader reader = new SAXReader();
|
|
|
|
|
2022-11-24 10:19:36 +01:00
|
|
|
try {
|
|
|
|
final Document doc = reader.read(new StringReader(xml));
|
|
|
|
|
|
|
|
final String id = StringUtils.substringBefore(doc.valueOf("//RESOURCE_IDENTIFIER/@value"), "_");
|
|
|
|
final Date now = new Date();
|
|
|
|
|
|
|
|
final SimpleResource res = new SimpleResource();
|
|
|
|
res.setId(id);
|
|
|
|
res.setCreationDate(now);
|
|
|
|
res.setModificationDate(now);
|
|
|
|
|
|
|
|
String resContent;
|
|
|
|
switch (doc.valueOf("//RESOURCE_TYPE/@value")) {
|
|
|
|
case "CleanerDSResourceType":
|
|
|
|
res.setType("cleaning_rule");
|
|
|
|
res.setName(doc.valueOf("//CLEANER_NAME"));
|
|
|
|
res.setDescription(doc.valueOf("//CLEANER_DESCRIPTION"));
|
2022-12-02 14:14:50 +01:00
|
|
|
resContent = XmlIndenter.indent(doc.selectSingleNode("//CLEANER_RULES"));
|
2022-11-24 10:19:36 +01:00
|
|
|
break;
|
|
|
|
case "TransformationRuleDSResourceType":
|
|
|
|
res.setName(doc.valueOf("//SCRIPT/TITLE"));
|
|
|
|
res.setDescription("");
|
|
|
|
if (doc.selectNodes("//*[local-name() = 'stylesheet']").size() > 0) {
|
|
|
|
res.setType("transformation_rule_xslt");
|
2022-12-02 14:14:50 +01:00
|
|
|
resContent = XmlIndenter.indent(doc.selectSingleNode("//*[local-name() = 'stylesheet']"));
|
2022-11-24 10:19:36 +01:00
|
|
|
} else {
|
|
|
|
final String code = doc.valueOf("//SCRIPT/CODE").trim();
|
|
|
|
try {
|
|
|
|
final Document xsltDoc = reader.read(new StringReader(code));
|
|
|
|
res.setType("transformation_rule_xslt");
|
2022-12-02 14:14:50 +01:00
|
|
|
resContent = XmlIndenter.indent(xsltDoc);
|
2022-11-24 10:19:36 +01:00
|
|
|
} catch (final DocumentException e) {
|
|
|
|
res.setType("transformation_rule_legacy");
|
|
|
|
resContent = code;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case "HadoopJobConfigurationDSResourceType":
|
|
|
|
res.setType("hadoop_job_configuration");
|
|
|
|
res.setName(doc.valueOf("//HADOOP_JOB/@name"));
|
|
|
|
res.setDescription(doc.valueOf("//HADOOP_JOB/DESCRIPTION"));
|
2022-12-02 14:14:50 +01:00
|
|
|
resContent = XmlIndenter.indent(doc.selectSingleNode("//HADOOP_JOB"));
|
2022-11-24 10:19:36 +01:00
|
|
|
break;
|
2022-12-02 13:29:53 +01:00
|
|
|
case "DedupConfigurationDSResourceType":
|
|
|
|
res.setType("dedup_configuration");
|
|
|
|
res.setName(doc.valueOf("//DESCRIPTION"));
|
|
|
|
res.setDescription(doc.valueOf("//DESCRIPTION"));
|
|
|
|
resContent = doc.valueOf("//DEDUPLICATION");
|
|
|
|
break;
|
2022-11-24 10:19:36 +01:00
|
|
|
default:
|
|
|
|
throw new InformationServiceException("Invalid resource type: " + doc.valueOf("//RESOURCE_TYPE/@value"));
|
|
|
|
}
|
|
|
|
|
|
|
|
simpleResourceRepository.save(res);
|
2022-12-02 13:29:53 +01:00
|
|
|
simpleResourceRepository.setContentById(id, resContent.trim());
|
2022-11-24 10:19:36 +01:00
|
|
|
|
2022-12-02 13:29:53 +01:00
|
|
|
return res.getId();
|
2022-11-24 10:19:36 +01:00
|
|
|
} catch (final Exception e) {
|
|
|
|
throw new InformationServiceException("Error parsing file", e);
|
|
|
|
}
|
2022-11-17 11:19:07 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Transactional
|
|
|
|
public Vocabulary importVocabulary(final String xml) throws Exception {
|
|
|
|
final SAXReader reader = new SAXReader();
|
|
|
|
final Document doc = reader.read(new StringReader(xml));
|
|
|
|
|
|
|
|
final Vocabulary voc = new Vocabulary();
|
|
|
|
final String vocId = doc.valueOf("//VOCABULARY_NAME/@code");
|
|
|
|
final String vocName = doc.valueOf("//VOCABULARY_NAME");
|
|
|
|
final String vocDesc = doc.valueOf("//VOCABULARY_DESCRIPTION");
|
|
|
|
|
|
|
|
voc.setId(vocId);
|
|
|
|
voc.setName(vocName);
|
|
|
|
voc.setDescription(vocDesc);
|
|
|
|
|
|
|
|
vocabularyRepository.save(voc);
|
|
|
|
|
|
|
|
for (final Node n : doc.selectNodes("//TERM")) {
|
|
|
|
final VocabularyTerm term = new VocabularyTerm();
|
|
|
|
term.setVocabulary(vocId);
|
|
|
|
term.setCode(n.valueOf("@code"));
|
|
|
|
term.setName(n.valueOf("@english_name"));
|
|
|
|
term.setEncoding(n.valueOf("@encoding"));
|
|
|
|
term.setSynonyms(n.selectNodes(".//SYNONYM")
|
|
|
|
.stream()
|
|
|
|
.map(ns -> new Synonym(ns.valueOf("@term"), ns.valueOf("@encoding")))
|
|
|
|
.sorted()
|
|
|
|
.distinct()
|
|
|
|
.toArray(Synonym[]::new));
|
|
|
|
|
|
|
|
vocabularyTermRepository.save(term);
|
|
|
|
}
|
|
|
|
|
|
|
|
return voc;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|