dnet-applications/apps/dnet-is-application/src/main/java/eu/dnetlib/is/importer/OldProfilesImporter.java

116 lines
3.6 KiB
Java
Raw Normal View History

2022-11-23 15:32:36 +01:00
package eu.dnetlib.is.importer;
2022-11-17 11:19:07 +01:00
import java.io.StringReader;
import java.util.Date;
import javax.transaction.Transactional;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.springframework.beans.factory.annotation.Autowired;
2022-11-23 11:17:50 +01:00
import org.springframework.http.MediaType;
2022-11-17 11:19:07 +01:00
import org.springframework.stereotype.Component;
import eu.dnetlib.is.resource.model.SimpleResource;
import eu.dnetlib.is.resource.repository.SimpleResourceRepository;
import eu.dnetlib.is.vocabulary.model.Synonym;
import eu.dnetlib.is.vocabulary.model.Vocabulary;
import eu.dnetlib.is.vocabulary.model.VocabularyTerm;
import eu.dnetlib.is.vocabulary.repository.VocabularyRepository;
import eu.dnetlib.is.vocabulary.repository.VocabularyTermRepository;
@Component
public class OldProfilesImporter {
@Autowired
private SimpleResourceRepository simpleResourceRepository;
@Autowired
private VocabularyRepository vocabularyRepository;
@Autowired
private VocabularyTermRepository vocabularyTermRepository;
@Transactional
public SimpleResource importSimpleResource(final String xml) throws Exception {
final SAXReader reader = new SAXReader();
final Document doc = reader.read(new StringReader(xml));
final String id = StringUtils.substringBefore(doc.valueOf("//RESOURCE_IDENTIFIER/@value"), "_");
final Date now = new Date();
final SimpleResource res = new SimpleResource();
res.setId(id);
res.setCreationDate(now);
res.setModificationDate(now);
2022-11-23 11:17:50 +01:00
res.setContentType(MediaType.APPLICATION_XML_VALUE);
2022-11-17 11:19:07 +01:00
String resContent;
switch (doc.valueOf("//RESOURCE_TYPE/@value")) {
case "CleanerDSResourceType":
res.setType("cleaning_rule");
res.setName(doc.valueOf("//CLEANER_NAME"));
res.setDescription(doc.valueOf("//CLEANER_DESCRIPTION"));
resContent = doc.selectSingleNode("//CLEANER_RULES").asXML();
break;
case "TransformationRuleDSResourceType":
res.setType("transformation_rule");
res.setName(doc.valueOf("//SCRIPT/TITLE"));
res.setDescription("");
resContent = doc.selectSingleNode("//SCRIPT/CODE").asXML();
break;
case "HadoopJobConfigurationDSResourceType":
res.setType("hadoop_job_configuration");
res.setName(doc.valueOf("//HADOOP_JOB/@name"));
res.setDescription(doc.valueOf("//HADOOP_JOB/DESCRIPTION"));
resContent = doc.selectSingleNode("//HADOOP_JOB").asXML();
break;
default:
throw new Exception("Invalid resource type: " + doc.valueOf("//RESOURCE_TYPE/@value"));
}
simpleResourceRepository.save(res);
simpleResourceRepository.setContentById(id, resContent);
return res;
}
@Transactional
public Vocabulary importVocabulary(final String xml) throws Exception {
final SAXReader reader = new SAXReader();
final Document doc = reader.read(new StringReader(xml));
final Vocabulary voc = new Vocabulary();
final String vocId = doc.valueOf("//VOCABULARY_NAME/@code");
final String vocName = doc.valueOf("//VOCABULARY_NAME");
final String vocDesc = doc.valueOf("//VOCABULARY_DESCRIPTION");
voc.setId(vocId);
voc.setName(vocName);
voc.setDescription(vocDesc);
vocabularyRepository.save(voc);
for (final Node n : doc.selectNodes("//TERM")) {
final VocabularyTerm term = new VocabularyTerm();
term.setVocabulary(vocId);
term.setCode(n.valueOf("@code"));
term.setName(n.valueOf("@english_name"));
term.setEncoding(n.valueOf("@encoding"));
term.setSynonyms(n.selectNodes(".//SYNONYM")
.stream()
.map(ns -> new Synonym(ns.valueOf("@term"), ns.valueOf("@encoding")))
.sorted()
.distinct()
.toArray(Synonym[]::new));
vocabularyTermRepository.save(term);
}
return voc;
}
}