importer dnet-data-transformation-service and unibi-data-collective-transformation-common in dnet-core-components, the transformation inspector was left behind for the moment

This commit is contained in:
Claudio Atzori 2019-06-07 17:38:50 +02:00
parent e098900ef4
commit 22d126ffa8
145 changed files with 19612 additions and 0 deletions

View File

@ -23,14 +23,27 @@
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>commons-beanutils</groupId>
<artifactId>commons-beanutils</artifactId>
</dependency>
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
</dependency>
<dependency>
<groupId>org.svenson</groupId>
<artifactId>svenson-json</artifactId>
</dependency>
<dependency>
<groupId>com.ximpleware</groupId>
<artifactId>vtd-xml</artifactId>
</dependency>
<dependency>
<groupId>com.sun.xsom</groupId>
<artifactId>xsom</artifactId>
</dependency>
<dependency>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>

View File

@ -0,0 +1,58 @@
package eu.dnetlib.common.profile;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.Node;
import eu.dnetlib.common.utils.XMLException;
import eu.dnetlib.common.utils.XMLUtils;
public abstract class DnetResource {
private static final Log log = LogFactory.getLog(DnetResource.class);
Document resource;
public DnetResource() {
}
public DnetResource(Document resource){
this.resource = resource;
}
public void setResource(Document resource){
this.resource = resource;
}
public Document getResource(){
return this.resource;
}
public String getValue(String xpathExpr){
String value = null;
try {
value = XMLUtils.evaluate(resource, xpathExpr);
} catch (XMLException e) {
log.error(e);
}
return value;
}
public void setValue(String xpathExpr, String value){
XMLUtils.getNode(resource, xpathExpr).setText(value);
}
public List<Node> getNodeList(String xpathExpr){
List<Node> nodeList = null;
try {
nodeList = XMLUtils.getNodes(resource, xpathExpr);
} catch (XMLException e) {
log.error(e);
}
return nodeList;
}
}

View File

@ -0,0 +1,11 @@
package eu.dnetlib.common.profile;
import java.util.List;
public interface IResource {
public String getValue(String xpathExpr);
@SuppressWarnings("unchecked")
public List getNodeList(String xpathExpr);
}

View File

@ -0,0 +1,17 @@
package eu.dnetlib.common.profile;
import java.util.List;
public interface IResourceDao {
// TODO documentation
public List<Resource> getResources(String xquery);
public Resource getResource(String id) throws Exception;
public Resource getResourceByQuery(String query) throws Exception;
public void removeResource(String id, Resource resource);
public void updateResource(String id, Resource resource);
}

View File

@ -0,0 +1,12 @@
package eu.dnetlib.common.profile;
import java.util.List;
public interface IResourceDaoSupport {
// TODO documentation
public List<Resource> getResources(String xquery);
public Resource getResourceByXquery(String xquery) throws Exception;
public Resource getResource(String id) throws Exception;
public void updateResource(String id, Resource resource);
public void removeResource(String id, Resource resource);
}

View File

@ -0,0 +1,30 @@
/**
*
*/
package eu.dnetlib.common.profile;
/**
* @author jochen
*
*/
public class ProfileNotFoundException extends Exception {
/**
*
*/
private static final long serialVersionUID = -6272083305345284826L;
public ProfileNotFoundException(Throwable e) {
super(e);
}
public ProfileNotFoundException(String msg, Throwable e) {
super(msg, e);
}
public ProfileNotFoundException(String msg) {
super(msg);
}
}

View File

@ -0,0 +1,31 @@
package eu.dnetlib.common.profile;
import java.io.InputStream;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.io.SAXReader;
/**
* @author jochen
*
*/
public class Resource extends DnetResource implements IResource{
public Resource(){super();}
public Resource(Document resource){
super(resource);
}
public Resource(String resourceProfile) throws DocumentException{
super(DocumentHelper.parseText(resourceProfile));
}
public Resource(InputStream resourceProfileStream) throws DocumentException{
super( (new SAXReader()).read(resourceProfileStream));
}
}

View File

@ -0,0 +1,90 @@
/**
*
*/
package eu.dnetlib.common.profile;
import java.util.List;
import java.util.concurrent.TimeUnit;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
// import eu.dnetlib.enabling.locators.DefaultUniqueServiceLocator;
import eu.dnetlib.enabling.tools.ServiceLocator;
/**
* @author jochen
*
*/
public class ResourceCache implements IResourceDaoSupport{
private final LoadingCache<String, Resource> cache;
@javax.annotation.Resource(name="lookupLocator")
private ServiceLocator<ISLookUpService> lookupLocator;
public ResourceCache() {
cache = CacheBuilder.newBuilder().expireAfterWrite(24, TimeUnit.HOURS).build(new CacheLoader<String, Resource>(){
@Override
public Resource load(String aKey) throws Exception {
Resource resource = null;
if (aKey.startsWith("collection")){
return new Resource(byQuery(aKey));
}else{
return new Resource(byId(aKey));
}
}
private String byQuery(String aQuery) throws ISLookUpDocumentNotFoundException, ISLookUpException{
return lookupLocator.getService().getResourceProfileByQuery(aQuery);
}
private String byId(String aId) throws ISLookUpDocumentNotFoundException, ISLookUpException{
return lookupLocator.getService().getResourceProfile(aId);
}
});
}
@Override
public List<Resource> getResources(String xquery) {
// TODO Auto-generated method stub
return null;
}
@Override
public Resource getResourceByXquery(String xquery)throws Exception {
return cache.get(xquery);
}
@Override
public Resource getResource(String id)throws Exception {
return cache.get(id);
}
@Override
public void updateResource(String id, Resource resource) {
// TODO Auto-generated method stub
}
@Override
public void removeResource(String id, Resource resource) {
// TODO Auto-generated method stub
}
public void setLookupLocator(ServiceLocator<ISLookUpService> lookupLocator) {
this.lookupLocator = lookupLocator;
}
public ServiceLocator<ISLookUpService> getLookupLocator() {
return lookupLocator;
}
}

View File

@ -0,0 +1,42 @@
package eu.dnetlib.common.profile;
import java.util.List;
public class ResourceDao implements IResourceDao {
private IResourceDaoSupport daoSupport;
public List<Resource> getResources(String xquery) {
return daoSupport.getResources(xquery);
}
@Override
public Resource getResourceByQuery(String query)throws Exception {
// currently only Xquery is supported
return daoSupport.getResourceByXquery(query);
}
public Resource getResource(String id)throws Exception {
return daoSupport.getResource(id);
}
public void removeResource(String id, Resource resource) {
daoSupport.removeResource(id, resource);
}
public void updateResource(String id, Resource resource) {
daoSupport.updateResource(id, resource);
}
public void setDaoSupport(IResourceDaoSupport daoSupport) {
this.daoSupport = daoSupport;
}
public IResourceDaoSupport getDaoSupport() {
return daoSupport;
}
}

View File

@ -0,0 +1,119 @@
package eu.dnetlib.common.profile;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.DocumentException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import eu.dnetlib.enabling.is.registry.rmi.ISRegistryException;
import eu.dnetlib.enabling.is.registry.rmi.ISRegistryService;
import eu.dnetlib.enabling.tools.ServiceLocator;
/**
*
* @author jochen
* @deprecated this class is deprecated, use ResourceCache instead.
*
*/
@Deprecated
public class ResourceDaoRemoteSupport implements IResourceDaoSupport{
private static final Log log = LogFactory.getLog(ResourceDaoRemoteSupport.class);
@javax.annotation.Resource(name="lookupLocator")
private ServiceLocator<ISLookUpService> lookupLocator;
@javax.annotation.Resource(name="registryLocator")
private ServiceLocator<ISRegistryService> registryLocator;
public List<Resource> getResources(String xquery){
List<Resource> list = new LinkedList<Resource>();
try {
List<String> profileList = lookupLocator.getService().quickSearchProfile(xquery);
if (profileList != null){
for (String profile: profileList){
Resource resource = new Resource(profile);
list.add(resource);
}
}
} catch (ISLookUpException e) {
log.error(e);
} catch (DocumentException e) {
log.error(e);
}
return list;
}
@Override
public Resource getResourceByXquery(String xquery) {
Resource resource = null;
String profile;
try{
profile = lookupLocator.getService().getResourceProfileByQuery(xquery);
resource = new Resource(profile);
} catch (ISLookUpDocumentNotFoundException e) {
log.error(e);
} catch (ISLookUpException e) {
log.error(e);
} catch (DocumentException e) {
log.error(e);
}
return resource;
}
public Resource getResource(String id) {
Resource resource = null;
String profile;
try {
profile = lookupLocator.getService().getResourceProfile(id);
resource = new Resource(profile);
} catch (ISLookUpDocumentNotFoundException e) {
log.error(e);
} catch (ISLookUpException e) {
log.error(e);
} catch (DocumentException e) {
log.error(e);
}
return resource;
}
@Override
public void removeResource(String id, Resource resource) {
// TODO Auto-generated method stub
}
@Override
public void updateResource(String id, Resource resource) {
try {
registryLocator.getService().updateProfile(resource.getValue("//RESOURCE_IDENTIFIER/@value"), resource.getResource().asXML(), resource.getValue("//RESOURCE_TYPE/@value"));
} catch (ISRegistryException e) {
log.error(e);
throw new IllegalStateException("cannot update profile.", e);
}
}
public void setRegistryLocator(ServiceLocator<ISRegistryService> registryLocator) {
this.registryLocator = registryLocator;
}
public ServiceLocator<ISRegistryService> getRegistryLocator() {
return registryLocator;
}
public void setLookupLocator(ServiceLocator<ISLookUpService> lookupLocator) {
this.lookupLocator = lookupLocator;
}
public ServiceLocator<ISLookUpService> getLookupLocator() {
return lookupLocator;
}
}

View File

@ -0,0 +1,245 @@
/**
* Copyright 2008-2009 DRIVER PROJECT (ICM UW)
* Original author: Marek Horst
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package eu.dnetlib.common.utils;
import java.io.StringReader;
import java.util.Map;
import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.ws.wsaddressing.W3CEndpointReference;
import javax.xml.ws.wsaddressing.W3CEndpointReferenceBuilder;
import org.apache.log4j.Logger;
import org.dom4j.io.DocumentResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
/**
* Information Service utils class.
* @author Marek Horst
* @version 0.7.6
*
*/
public class EprUtils {
protected static final Logger log = Logger.getLogger(EprUtils.class);
public static final String INDEX_RESULT_SET_NAME = "ICMResultSet";
public static final String SERVICE_NAME = "IndexService";
private DocumentResult infoset = new DocumentResult();
/**
* @param epr - W3CEndpoint reference
* @param nsMap - mapping of namespace-prefix,uri pairs
*/
public EprUtils(W3CEndpointReference epr, Map<String, String> nsMap){
epr.writeTo(infoset);
XMLUtils.setNamespaces(nsMap);
}
/**
* @param xpathExpr
* @return the value obtained by the xpath evaluation
* @throws XMLException
*/
public String getValue(String xpathExpr) throws XMLException{
return XMLUtils.evaluate(infoset.getDocument(), xpathExpr);
}
/**
* Parses ResultSetEPR to the String[] where:
* String[0] - ResultSetService location,
* String[1] - ResultSetId
* @param resultSetEPR
* @return string array where: String[0] - ResultSetService location, String[1] - ResultSetId
*/
@Deprecated
public static String[] parseResultSetEPR(String resultSetEPR) {
if (resultSetEPR==null || resultSetEPR.length()==0)
return null;
DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
factory.setIgnoringComments(true);
factory.setValidating(false);
DocumentBuilder db;
try {
db = factory.newDocumentBuilder();
Document doc = db.parse(new InputSource(new StringReader(resultSetEPR)));
Element documentElement = doc.getDocumentElement();
NodeList nodeList = documentElement.getElementsByTagName("ResourceIdentifier:ResourceIdentifier");
if (nodeList.getLength()!=1) {
log.error("Invalid notifications of nodes for driver:ResourceIdentifier element. Expected 1, found: "+nodeList.getLength());
return null;
}
if (nodeList.item(0)==null) {
log.error("Couldn't find ResourceIdentifier:ResourceIdentifier element!");
return null;
}
NodeList nodeListWSA = documentElement.getElementsByTagName("Address");
if (nodeListWSA.getLength()!=1) {
nodeListWSA = documentElement.getElementsByTagName("Address");
if (nodeListWSA.getLength()!=1) {
log.error("Invalid notifications of nodes for Address element. Expected 1, found: "+nodeListWSA.getLength());
return null;
}
}
if (nodeListWSA.item(0)==null) {
log.error("Couldn't find Address element!");
return null;
}
return new String[] {
getStringFromNode(nodeListWSA.item(0)),
getStringFromNode(nodeList.item(0))
};
} catch (Exception e) {
log.error("Exception occured when extracting ResultSet id from ResultSet service xml-type response!",e);
return null;
}
}
/**
* Extracts ResultSet identifier from ResultSet xml-type response.
* @param sourceResultSetId
* @return ResultSet identifier.
*/
public static String extractResultSetId(String sourceResultSetId) {
if (sourceResultSetId==null || sourceResultSetId.length()==0)
return null;
DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
factory.setIgnoringComments(true);
factory.setValidating(false);
DocumentBuilder db;
try {
db = factory.newDocumentBuilder();
Document doc = db.parse(new InputSource(new StringReader(sourceResultSetId)));
Element documentElement = doc.getDocumentElement();
NodeList nodeList = documentElement.getElementsByTagName("ResourceIdentifier:ResourceIdentifier");
if (nodeList.getLength()!=1) {
log.error("Invalid notifications of nodes for ResourceIdentifier:ResourceIdentifier element. Expected 1, found: "+nodeList.getLength());
return null;
}
if (nodeList.item(0)==null) {
log.error("Couldn't find ResourceIdentifier:ResourceIdentifier element!");
return null;
}
return getStringFromNode(nodeList.item(0));
} catch (Exception e) {
log.error("Exception occured when extracting ResultSet id from ResultSet service xml-type response!",e);
return null;
}
}
private static String getStringFromNode(Node node) {
// This code may not work on some jdk
// Element resourceIdentifier = (Element) node;
// return resourceIdentifier.getTextContent();
/*
try {
DOMSource domSource = new DOMSource(node);
StringWriter writer = new StringWriter();
StreamResult result = new StreamResult(writer);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.transform(domSource, result);
return writer.toString();
} catch (TransformerException e) {
log.error("Exception occured when transforming node value!", e);
return null;
}
*/
return node.getFirstChild().getNodeValue();
}
/**
* Builds ResultSet end point reference for given serviceAddress and resultSetId.
* @param serviceAddress
* @param resultSetId
* @param wsdlLocation
* @return resultSet EPR
*/
public static String buildResultSetEPR(String serviceAddress, String resultSetId,
String wsdlLocation) {
StringBuffer strBuff = new StringBuffer();
strBuff.append("<?xml version=\"1.0\" encoding=\"utf-8\"?>");
strBuff.append("<wsa:EndpointReference xmlns:wsa=\"http://www.driver.org/schema\" xmlns:driver=\"http://www.driver.org\" xmlns:wsaw=\"http://www.w3.org/2006/02/addressing/wsdl\" xmlns:wsdl=\"http://www.w3.org/2005/08/wsdl-instance\">");
strBuff.append("<Address>");
strBuff.append(serviceAddress);
strBuff.append("</Address>");
strBuff.append("<wsa:ReferenceParameters>");
strBuff.append("<ResourceIdentifier:ResourceIdentifier>");
strBuff.append(resultSetId);
strBuff.append("</ResourceIdentifier:ResourceIdentifier>");
strBuff.append("</wsa:ReferenceParameters>");
strBuff.append("<wsa:Metadata wsdl:wsdlLocation=\""+wsdlLocation+"\">");
strBuff.append("<wsaw:ServiceName>");
strBuff.append(INDEX_RESULT_SET_NAME);
strBuff.append("</wsaw:ServiceName>");
strBuff.append("</wsa:Metadata>");
strBuff.append("</wsa:EndpointReference>");
return strBuff.toString();
}
/**
* Builds W3C ResultSet end point reference for given serviceAddress and
* resultSetId.
*
* @param serviceAddress
* @param resultSetId
* @param wsdlLocation
* @return W3C resultSet EPR
* @throws ParserConfigurationException
*/
public static W3CEndpointReference buildW3CEPR(
String serviceAddress, String wsdlLocation) throws ParserConfigurationException {
final W3CEndpointReferenceBuilder W3CResultSetEPR = new W3CEndpointReferenceBuilder();
W3CResultSetEPR.address(serviceAddress);
W3CResultSetEPR.serviceName(new QName("http://www.w3.org/2006/02/addressing/wsdl",SERVICE_NAME));
W3CResultSetEPR.endpointName(new QName("http://www.driver.org/schema",SERVICE_NAME));
W3CResultSetEPR.wsdlDocumentLocation(wsdlLocation);
/*
final Document doc = DocumentBuilderFactory.newInstance()
.newDocumentBuilder().newDocument();
final Element referenceElement = doc.createElementNS(
"http://www.driver.org", "driver:ResourceIdentifier");
referenceElement.setTextContent(resultSetId);
W3CResultSetEPR.referenceParameter(referenceElement);
*/
return W3CResultSetEPR.build();
}
}

View File

@ -0,0 +1,19 @@
package eu.dnetlib.common.utils;
public class XMLException extends Exception {
static final long serialVersionUID = 2413331108861490367L;
public XMLException(String errorMessage){
super(errorMessage);
}
public XMLException(Exception exc){
super(exc);
}
public XMLException(String errorMessage, Throwable e){
super(errorMessage, e);
}
}

View File

@ -0,0 +1,45 @@
package eu.dnetlib.common.utils;
import java.io.StringWriter;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.namespace.QName;
public class XMLSerializer<T> {
private Marshaller marshaller;
private Class<T> clazz;
public XMLSerializer(Class<T> clazz){
this.clazz = clazz;
try {
init();
} catch (JAXBException e) {
throw new IllegalArgumentException(e);
}
}
protected void init() throws JAXBException{
Class<?>[] all = {this.clazz};
JAXBContext context = JAXBContext.newInstance(all);
marshaller = context.createMarshaller();
marshaller.setProperty("com.sun.xml.bind.xmlDeclaration", false);
}
public String getAsXml(T record) throws JAXBException{
final StringWriter buffer = new StringWriter();
marshaller.marshal(createElement(record), buffer);
return buffer.toString();
}
protected JAXBElement<T> createElement(final T value) {
final XmlRootElement annotation = this.clazz.getAnnotation(XmlRootElement.class);
return new JAXBElement<T>(new QName(annotation.namespace(), annotation.name()), this.clazz, null, value);
}
}

View File

@ -0,0 +1,54 @@
package eu.dnetlib.common.utils;
import java.io.StringReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Node;
import org.dom4j.XPath;
import org.dom4j.io.SAXReader;
public class XMLUtils {
private static final SAXReader reader = new SAXReader();
private static Map<String, String> nsMap = new HashMap<String, String>();
public static Document getDocument(String document)throws XMLException{
try{
return reader.read(new StringReader(document));
}catch(DocumentException e){
throw new XMLException(e);
}
}
public static void setNamespaces(Map<String, String> nsMap){
XMLUtils.nsMap = nsMap;
}
public static String evaluate(String document, String expression)throws XMLException{
XPath xpath = DocumentHelper.createXPath(expression);
xpath.setNamespaceURIs(XMLUtils.nsMap);
return xpath.valueOf(getDocument(document));
}
public static String evaluate(Node document, String expression)throws XMLException{
XPath xpath = DocumentHelper.createXPath(expression);
xpath.setNamespaceURIs(XMLUtils.nsMap);
return xpath.valueOf(document);
}
@SuppressWarnings("unchecked")
public static List<Node> getNodes(Node document, String expression)throws XMLException{
XPath xpath = DocumentHelper.createXPath(expression);
xpath.setNamespaceURIs(XMLUtils.nsMap);
return xpath.selectNodes(document, xpath);
}
public static Node getNode(Node document, String expression){
return document.selectSingleNode(expression);
}
}

View File

@ -0,0 +1,20 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation;
import java.util.List;
/**
* @author jochen
*
*/
public interface IDatabaseConnector {
/**
* executes a SQL query
* @param aSQLquery
* @return List containing the results of this query execution
*/
List<String> getResult(String aSQLquery) throws TransformationException;
}

View File

@ -0,0 +1,44 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation;
/**
* @author jochen
*
*/
public class TransformationException extends Exception {
/**
*
*/
public TransformationException() {
// TODO Auto-generated constructor stub
}
/**
* @param message
*/
public TransformationException(String message) {
super(message);
// TODO Auto-generated constructor stub
}
/**
* @param cause
*/
public TransformationException(Throwable cause) {
super(cause);
// TODO Auto-generated constructor stub
}
/**
* @param message
* @param cause
*/
public TransformationException(String message, Throwable cause) {
super(message, cause);
// TODO Auto-generated constructor stub
}
}

View File

@ -0,0 +1,38 @@
package eu.dnetlib.data.collective.transformation;
import java.util.Map;
import org.svenson.JSONProperty;
import org.svenson.JSONTypeHint;
import eu.dnetlib.data.collective.transformation.engine.functions.Vocabulary;
public class VocabularyMap {
private Map<String, Vocabulary> map;
/**
* Returns true if the vocabulary map contains the key argument. Method implemented for backward compatibility.
* @param aKey vocabulary name as a key
* @return true if key exist else false
*/
public boolean containsKey(String aKey){
return map.containsKey(aKey);
}
/**
* @return the map
*/
@JSONProperty(ignoreIfNull = true)
public Map<String, Vocabulary> getMap() {
return map;
}
/**
* @param map the map to set
*/
@JSONTypeHint(Vocabulary.class)
public void setMap(Map<String, Vocabulary> map) {
this.map = map;
}
}

View File

@ -0,0 +1,96 @@
package eu.dnetlib.data.collective.transformation;
import javax.annotation.Resource;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.DocumentException;
import eu.dnetlib.data.collective.transformation.engine.functions.DateVocabulary;
import eu.dnetlib.data.collective.transformation.engine.functions.IVocabulary;
import eu.dnetlib.data.collective.transformation.engine.functions.PersonVocabulary;
// import eu.dnetlib.data.collective.transformation.engine.functions.PmcVocabulary;
import eu.dnetlib.data.collective.transformation.engine.functions.Vocabulary;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.locators.DefaultUniqueServiceLocator;
/**
* @author jochen
*
*/
public class VocabularyRegistry {
private static final Log log = LogFactory.getLog(VocabularyRegistry.class);
private static final String dateVocabularyName = "DateISO8601";
// private static final String pmcVocabularyName = "PMC";
private static final String personVocabularyName = "Person";
@Resource
private DefaultUniqueServiceLocator uniqueServiceLocator;
private VocabularyMap vocabularies;
private boolean isInitialized = false;
public void init() {
String vocabularyQueryPrefix = "collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')//RESOURCE_PROFILE";
String targetVocabulary = "";
for (String key : vocabularies.getMap().keySet()) {
try {
Vocabulary v = vocabularies.getMap().get(key);
targetVocabulary = vocabularies.getMap().get(key).getName();
v.setResource(new eu.dnetlib.common.profile.Resource(uniqueServiceLocator.getIsLookupService().getResourceProfileByQuery(
vocabularyQueryPrefix + "[.//VOCABULARY_NAME='" + targetVocabulary + "' or .//VOCABULARY_NAME/@code='" + targetVocabulary + "'] ")));
} catch (ISLookUpDocumentNotFoundException e) {
throw new IllegalStateException("vocabulary profile not found for name or code " + targetVocabulary, e);
} catch (ISLookUpException e) {
log.fatal("ISLookupException in VocabularyRegistry, key = " + key + " : ", e);
throw new IllegalStateException(e);
} catch (DocumentException e) {
log.fatal("DocumentException in VocabularyRegistry, key = " + key + " : ", e);
throw new IllegalStateException(e);
}
}
vocabularies.getMap().put(dateVocabularyName, new DateVocabulary());
vocabularies.getMap().put(personVocabularyName, new PersonVocabulary());
// PmcVocabulary pmcVocab = new PmcVocabulary();
// pmcVocab.setMappingFile(mappingFile);
// vocabularies.getMap().put(pmcVocabularyName, pmcVocab);
isInitialized = true;
log.info("VocabularyRegistry is initialized.");
}
public IVocabulary getVocabulary(final String aVocabularyName) {
if (!isInitialized) {
init();
}
return vocabularies.getMap().get(aVocabularyName);
}
public VocabularyMap getVocabularies() {
if (!isInitialized) {
init();
}
return vocabularies;
}
public void setVocabularies(final VocabularyMap vocabularies) {
this.vocabularies = vocabularies;
}
public void addVocabulary(final String aVocabularyName, final Vocabulary aVocabulary) {
this.vocabularies.getMap().put(aVocabularyName, aVocabulary);
}
public void removeVocabulary(final String aVocabulary) {
this.vocabularies.getMap().remove(aVocabulary);
}
public DefaultUniqueServiceLocator getUniqueServiceLocator() {
return uniqueServiceLocator;
}
public void setUniqueServiceLocator(final DefaultUniqueServiceLocator uniqueServiceLocator) {
this.uniqueServiceLocator = uniqueServiceLocator;
}
}

View File

@ -0,0 +1,25 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation;
import java.beans.PropertyEditorSupport;
import org.svenson.JSONParser;
/**
* @author js
*
*/
public class VocabularyTypeEditor extends PropertyEditorSupport {
/**
* Sets the property value by parsing the given JsonString. May raise java.lang.IllegalArgumentException if either the String is badly formatted or if this kind of property can't be expressed as text.
* @see java.beans.PropertyEditorSupport#setAsText(java.lang.String)
*/
@Override
public void setAsText(String aJsonString) throws IllegalArgumentException {
VocabularyMap map = JSONParser.defaultJSONParser().parse(VocabularyMap.class, aJsonString);
setValue(map);
}
}

View File

@ -0,0 +1,47 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.core.schema;
/**
* @author jochen
*
*/
public class Namespace {
String prefix;
String uri;
public Namespace(String aPrefix, String aUri) {
this.prefix = aPrefix;
this.uri = aUri;
}
/**
* @return the prefix
*/
public String getPrefix() {
return prefix;
}
/**
* @param prefix the prefix to set
*/
public void setPrefix(String prefix) {
this.prefix = prefix;
}
/**
* @return the uri
*/
public String getUri() {
return uri;
}
/**
* @param uri the uri to set
*/
public void setUri(String uri) {
this.uri = uri;
}
}

View File

@ -0,0 +1,42 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.core.schema;
/**
* @author jochen
*
*/
public class SchemaAttribute {
private String name;
private boolean required;
/**
* @return the name
*/
public String getName() {
return name;
}
/**
* @param name the name to set
*/
public void setName(String name) {
this.name = name;
}
/**
* @return the required
*/
public boolean isRequired() {
return required;
}
/**
* @param required the required to set
*/
public void setRequired(boolean required) {
this.required = required;
}
}

View File

@ -0,0 +1,160 @@
package eu.dnetlib.data.collective.transformation.core.schema;
import java.util.LinkedList;
import java.util.List;
/**
* @author jochen
*
*/
public class SchemaElement {
private String targetNamespace;
private String name;
private boolean isRepeatable;
private boolean isRoot;
private boolean containsSimpleType;
private int minOccurs;
private int maxOccurs;
private List<SchemaElement> childList = new LinkedList<SchemaElement>();
private List<SchemaAttribute> attributeList = new LinkedList<SchemaAttribute>();
private Namespace namespace;
/**
* @return the targetNamespace
*/
public String getTargetNamespace() {
return targetNamespace;
}
/**
* @param targetNamespace the targetNamespace to set
*/
public void setTargetNamespace(String targetNamespace) {
this.targetNamespace = targetNamespace;
}
/**
* @return the isRepeatable
*/
public boolean isRepeatable() {
return isRepeatable;
}
/**
* @param isRepeatable the isRepeatable to set
*/
public void setRepeatable(boolean isRepeatable) {
this.isRepeatable = isRepeatable;
}
/**
* @return the isMandatory
*/
public boolean isMandatory() {
if (minOccurs > 0) return true;
return false;
}
/**
* @return the minOccurs
*/
public int getMinOccurs() {
return minOccurs;
}
/**
* @param minOccurs the minOccurs to set
*/
public void setMinOccurs(int minOccurs) {
this.minOccurs = minOccurs;
}
/**
* @return the maxOccurs
*/
public int getMaxOccurs() {
return maxOccurs;
}
/**
* @param maxOccurs the maxOccurs to set
*/
public void setMaxOccurs(int maxOccurs) {
this.maxOccurs = maxOccurs;
}
/**
* @return the childList
*/
public List<SchemaElement> getChildList() {
return childList;
}
/**
* @param childList the childList to set
*/
public void setChildList(List<SchemaElement> childList) {
this.childList = childList;
}
/**
* @param name the name of the element to set
*/
public void setName(String name) {
this.name = name;
}
/**
* @return the name of this element
*/
public String getName() {
return name;
}
/**
* sets true if this element contains a simpleType, false else
* @param containsSimpleType
*/
public void setContainsSimpleType(boolean containsSimpleType) {
this.containsSimpleType = containsSimpleType;
}
/**
* @return the containsSimpleType
*/
public boolean containsSimpleType() {
return containsSimpleType;
}
/**
* @param isRoot the isRoot to set
*/
public void setRoot(boolean isRoot) {
this.isRoot = isRoot;
}
/**
* @return the isRoot
*/
public boolean isRoot() {
return isRoot;
}
/**
* @param namespace the namespace to set
*/
public void setNamespace(Namespace namespace) {
this.namespace = namespace;
}
/**
* @return the namespace
*/
public Namespace getNamespace() {
return namespace;
}
/**
* @return the attributeList
*/
public List<SchemaAttribute> getAttributeList() {
return attributeList;
}
/**
* @param attributeList the attributeList to set
*/
public void addAttribute(SchemaAttribute aAttribute) {
this.attributeList.add(aAttribute);
}
}

View File

@ -0,0 +1,98 @@
package eu.dnetlib.data.collective.transformation.core.schema;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.List;
import org.xml.sax.SAXException;
import com.sun.xml.xsom.XSContentType;
import com.sun.xml.xsom.XSElementDecl;
import com.sun.xml.xsom.XSParticle;
import com.sun.xml.xsom.XSSchemaSet;
import com.sun.xml.xsom.XSTerm;
import com.sun.xml.xsom.parser.XSOMParser;
import eu.dnetlib.data.collective.transformation.core.schema.visitor.Visitor;
/**
* @author jochen
*
*/
public class SchemaInspector {
private List<SchemaElement> elementList = new java.util.LinkedList<SchemaElement>();
private boolean inspected = false;
private String rootElement;
public void inspect(File aSchema, String aRootElement) throws SAXException, IOException{
XSOMParser parser = new XSOMParser();
parser.parse(aSchema);
doInspect(parser, aRootElement);
}
public void inspect(URL aSchema, String aRootElement)throws SAXException{
XSOMParser parser = new XSOMParser();
parser.parse(aSchema);
doInspect(parser, aRootElement);
}
/**
* inspects the schema and creates a new list of schema elements.
* @param parser
* @param aRootElement
* @throws SAXException
*/
private void doInspect(XSOMParser parser, String aRootElement) throws SAXException{
this.rootElement = aRootElement;
// for (SchemaDocument doc: parser.getDocuments()){
// Map<String, XSAttributeDecl> attgrdecls = doc.getSchema().getAttributeDecls();
// for (String k: attgrdecls.keySet()){
// System.out.println("keyxs: " + k);
// }
// }
Visitor visitor = new Visitor();
XSSchemaSet sset = parser.getResult();
// Iterator<XSAttributeDecl> it = sset.iterateAttributeDecls();
// while(it.hasNext()){
// System.out.println(it.next().getName());
// }
XSElementDecl elemDecl = sset.getElementDecl("", aRootElement);
if (elemDecl == null){
throw new IllegalStateException("rootElement " + aRootElement + " not found in schema.");
}
// assuming the root element is of complex type
if (elemDecl.getType().isComplexType()){
XSContentType contentType = elemDecl.getType().asComplexType().getContentType();
XSParticle particle = contentType.asParticle();
if (particle != null){
XSTerm term = particle.getTerm();
term.visit(visitor);
}
}
this.elementList = visitor.getElements();
this.inspected = true;
}
/**
* @return the inspected
*/
public boolean isInspected() {
return inspected;
}
/**
* @return the rootElement
*/
public String getRootElement() {
return rootElement;
}
/**
* @return the child elements
*/
public List<SchemaElement> getChildElements(){
return elementList;
}
}

View File

@ -0,0 +1,168 @@
package eu.dnetlib.data.collective.transformation.core.schema.visitor;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.lang3.NotImplementedException;
//import org.apache.commons.logging.Log;
//import org.apache.commons.logging.LogFactory;
import com.sun.xml.xsom.XSAnnotation;
import com.sun.xml.xsom.XSAttGroupDecl;
import com.sun.xml.xsom.XSAttributeDecl;
import com.sun.xml.xsom.XSAttributeUse;
import com.sun.xml.xsom.XSComplexType;
import com.sun.xml.xsom.XSContentType;
import com.sun.xml.xsom.XSElementDecl;
import com.sun.xml.xsom.XSFacet;
import com.sun.xml.xsom.XSIdentityConstraint;
import com.sun.xml.xsom.XSModelGroup;
import com.sun.xml.xsom.XSModelGroupDecl;
import com.sun.xml.xsom.XSNotation;
import com.sun.xml.xsom.XSParticle;
import com.sun.xml.xsom.XSSchema;
import com.sun.xml.xsom.XSSimpleType;
import com.sun.xml.xsom.XSType;
import com.sun.xml.xsom.XSWildcard;
import com.sun.xml.xsom.XSXPath;
import com.sun.xml.xsom.visitor.XSVisitor;
import eu.dnetlib.data.collective.transformation.core.schema.SchemaAttribute;
import eu.dnetlib.data.collective.transformation.core.schema.SchemaElement;
/**
* @author jochen
*
*/
public class Visitor implements XSVisitor {
//private static Log log = LogFactory.getLog(Visitor.class);
List<SchemaElement> schemaElements = new LinkedList<SchemaElement>();
SchemaElement currentElement;
SchemaAttribute currentAttribute;
@Override
public void annotation(XSAnnotation arg0) {
throw new NotImplementedException("TODO: annotation");
}
@Override
public void attGroupDecl(XSAttGroupDecl arg0) {
throw new NotImplementedException("TODO attGroupDecl");
}
@Override
public void attributeDecl(XSAttributeDecl aAttributeDecl) {
currentAttribute.setName(aAttributeDecl.getName());
//log.debug("visit attribute name: " + aAttributeDecl.getName());
//log.debug("visit attribute type: " + aAttributeDecl.getType());
throw new NotImplementedException("TODO attributeDecl");
}
@Override
public void attributeUse(XSAttributeUse aAttributeUse) {
throw new NotImplementedException("TODO attributeUse");
}
@Override
public void complexType(XSComplexType aType) {
if (aType.getDerivationMethod()== XSType.RESTRICTION){
XSContentTypeVisitorImpl contentTypeVisitor = new XSContentTypeVisitorImpl();
contentTypeVisitor.setVisitor(this);
aType.getContentType().visit(contentTypeVisitor);
}else{
// aType.getExplicitContent().visit(this);
throw new NotImplementedException("visiting types other then 'RESTRICTION are not implemented'");
}
}
@Override
public void facet(XSFacet arg0) {
throw new NotImplementedException("TODO facet");
}
@Override
public void identityConstraint(XSIdentityConstraint arg0) {
throw new NotImplementedException("TODO identityConstraint");
}
@Override
public void notation(XSNotation arg0) {
throw new NotImplementedException("TODO notation");
}
@Override
public void schema(XSSchema arg0) {
throw new NotImplementedException("TODO schema");
}
@Override
public void xpath(XSXPath arg0) {
throw new NotImplementedException("TODO xpath");
}
@Override
public void elementDecl(XSElementDecl aElementDecl) {
XSType type = aElementDecl.getType();
if (type.isLocal()){
// complete infos about the current element
// log.debug("visitor element name: " + aElementDecl.getName());
currentElement.setName(aElementDecl.getName());
currentElement.setTargetNamespace(aElementDecl.getTargetNamespace());
type.visit(this);
}
}
@Override
public void modelGroup(XSModelGroup aGroup) {
// a group of elements as childs of the root element
for (XSParticle p: aGroup.getChildren()){
particle(p);
}
}
@Override
public void modelGroupDecl(XSModelGroupDecl arg0) {
throw new NotImplementedException("TODO modelGroupDecl");
}
@Override
public void wildcard(XSWildcard arg0) {
throw new NotImplementedException("TODO wildcard");
}
@Override
public void empty(XSContentType arg0) {
throw new NotImplementedException("TODO empty");
}
@Override
public void particle(XSParticle aParticle) {
// create a new schema element, add to the list of schema elements, set this element as current element
SchemaElement element = new SchemaElement();
element.setMinOccurs(aParticle.getMinOccurs().intValue());
element.setMaxOccurs(aParticle.getMaxOccurs().intValue());
element.setRepeatable(aParticle.isRepeated());
schemaElements.add(element);
currentElement = element;
XSTermVisitorImpl termVisitor = new XSTermVisitorImpl();
termVisitor.setVisitor(this);
aParticle.getTerm().visit(termVisitor);
}
@Override
public void simpleType(XSSimpleType arg0) {
throw new NotImplementedException("TODO simpleType");
}
public List<SchemaElement> getElements(){
return this.schemaElements;
}
protected SchemaElement getCurrentElement(){
return currentElement;
}
}

View File

@ -0,0 +1,82 @@
package eu.dnetlib.data.collective.transformation.core.schema.visitor;
import java.util.Collection;
import java.util.Iterator;
import org.apache.commons.lang3.NotImplementedException;
import com.sun.xml.xsom.XSAttributeUse;
import com.sun.xml.xsom.XSContentType;
import com.sun.xml.xsom.XSElementDecl;
import com.sun.xml.xsom.XSParticle;
import com.sun.xml.xsom.XSSimpleType;
import com.sun.xml.xsom.XSType;
import com.sun.xml.xsom.visitor.XSContentTypeVisitor;
import eu.dnetlib.data.collective.transformation.core.schema.SchemaAttribute;
import eu.dnetlib.data.collective.transformation.core.schema.SchemaElement;
/**
* @author jochen
*
*/
public class XSContentTypeVisitorImpl implements XSContentTypeVisitor {
private Visitor visitor;
@Override
public void empty(XSContentType arg0) {
throw new NotImplementedException("TODO empty");
}
@Override
public void particle(XSParticle aParticle) {
XSTermVisitorImpl termVisitor = new XSTermVisitorImpl();
termVisitor.setVisitor(this.visitor);
aParticle.getTerm().visit(termVisitor);
if (aParticle.getTerm().isElementDecl()){
XSElementDecl elem = aParticle.getTerm().asElementDecl();
SchemaElement element = new SchemaElement();
XSType type = elem.getType();
if (type.isComplexType()){
Collection<? extends XSAttributeUse> attrColls =
type.asComplexType().getDeclaredAttributeUses();
Iterator<? extends XSAttributeUse> attrIter = attrColls.iterator();
while (attrIter.hasNext()){
XSAttributeUse attrUse = attrIter.next();
SchemaAttribute attribute = new SchemaAttribute();
attribute.setName(attrUse.getDecl().getName());
attribute.setRequired(attrUse.isRequired());
element.addAttribute(attribute);
}
}
element.setName(elem.getName());
element.setTargetNamespace(elem.getTargetNamespace());
element.setMinOccurs(aParticle.getMinOccurs().intValue());
element.setMaxOccurs(aParticle.getMaxOccurs().intValue());
element.setRepeatable(aParticle.isRepeated());
if (elem.getType().isComplexType()){
if (elem.getType().asComplexType().getContentType().asSimpleType() != null){
element.setContainsSimpleType(true);
}
}
this.visitor.getCurrentElement().getChildList().add(element);
}
}
@Override
public void simpleType(XSSimpleType arg0) {
throw new NotImplementedException("TODO simpleType");
}
public void setVisitor(Visitor visitor) {
this.visitor = visitor;
}
public Visitor getVisitor() {
return visitor;
}
}

View File

@ -0,0 +1,56 @@
package eu.dnetlib.data.collective.transformation.core.schema.visitor;
import org.apache.commons.lang3.NotImplementedException;
import com.sun.xml.xsom.XSElementDecl;
import com.sun.xml.xsom.XSModelGroup;
import com.sun.xml.xsom.XSModelGroupDecl;
import com.sun.xml.xsom.XSParticle;
import com.sun.xml.xsom.XSWildcard;
import com.sun.xml.xsom.visitor.XSTermVisitor;
/**
* @author jochen
*
*/
public class XSTermVisitorImpl implements XSTermVisitor {
private Visitor visitor;
@Override
public void elementDecl(XSElementDecl aElementDecl) {
if (aElementDecl.isLocal()){
this.visitor.elementDecl(aElementDecl);
}else{
// ignore non local element declarations
}
}
@Override
public void modelGroup(XSModelGroup aModelGroup) {
XSContentTypeVisitorImpl contentTypeVisitor = new XSContentTypeVisitorImpl();
contentTypeVisitor.setVisitor(this.visitor);
for (XSParticle p: aModelGroup.getChildren()){
contentTypeVisitor.particle(p);
}
}
@Override
public void modelGroupDecl(XSModelGroupDecl arg0) {
throw new NotImplementedException("TODO modelGroupDecl");
}
@Override
public void wildcard(XSWildcard arg0) {
throw new NotImplementedException("TODO wildcard");
}
public void setVisitor(Visitor visitor) {
this.visitor = visitor;
}
public Visitor getVisitor() {
return visitor;
}
}

View File

@ -0,0 +1,48 @@
package eu.dnetlib.data.collective.transformation.core.xsl;
import java.util.LinkedList;
import java.util.List;
/**
* @author jochen
*
*/
public abstract class AbstractXslElement {
private String functionName;
protected List<String> attrList = new LinkedList<String>();
protected StringBuilder enclosedElements = new StringBuilder();
protected List<String> nsList = new LinkedList<String>();
public AbstractXslElement(String aFunctioName) {
this.functionName = aFunctioName;
}
public String asXml(boolean isEmpty){
StringBuilder builder = new StringBuilder();
builder.append("<");
builder.append(functionName + " ");
for (String ns: nsList){
builder.append(ns + " ");
}
for (String attr: attrList){
builder.append(attr);
}
if (isEmpty){
builder.append("/>");
}else{
builder.append(">");
builder.append(enclosedElements.toString());
builder.append("</");
builder.append(functionName + ">");
}
return builder.toString();
}
public String asXml() {
return asXml(false);
}
}

View File

@ -0,0 +1,406 @@
package eu.dnetlib.data.collective.transformation.core.xsl;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
/**
* @author jochen
*
*/
public class XslConstructor {
private static final Log log = LogFactory.getLog(XslConstructor.class);
public String writeOutVariableRule(Rules rule){
XslElement xsltVariable = new XslElement(XsltConstants.param);
xsltVariable.addAttribute("name", rule.getUniqueName().substring(1));
if (rule.getXpath().length() > 0){
xsltVariable.addAttribute("select", rule.getXpath());
}else if (rule.getFunctionCall() != null){
// TODO - set the value if rule is static
if (rule.getFunctionCall().doPreprocess()){
xsltVariable.addAttribute("select", rule.getFunctionCall().getXSLpreparatedFunctionCall());
}else{
xsltVariable.addAttribute("select", rule.getFunctionCall().getXSLdirectFunctionCall(null));
}
}else{
if (rule.getConstant().length() > 0){
xsltVariable.addAttribute("select", rule.getConstant());
}
}
return xsltVariable.asXml();
}
/**
* @param rule
* @return
*/
public String writeOutConditionalChooseComplex(Rules rule){
XslElement choose = new XslElement(XsltConstants.choose);
XslElement when = new XslElement(XsltConstants.when);
log.debug("XslConstructor: conditionExpresssion: " + rule.getCondition().getConditionExpression());
when.addAttribute("test", rule.getCondition().getConditionExpression());
when.addEnclosedElements(this.writeOutRuleComplex(rule.getCondition().getPrimaryRule(), rule.getCondition().getPrimaryRule().getUniqueName()));
choose.addEnclosedElements(when.asXml());
XslElement otherwise = new XslElement(XsltConstants.otherwise);
otherwise.addEnclosedElements(this.writeOutRuleComplex(rule.getCondition().getSecondaryRule(), rule.getCondition().getSecondaryRule().getUniqueName()));
choose.addEnclosedElements(otherwise.asXml());
return choose.asXml();
}
public String writeOutConditionalIfComplex(Rules rule){
XslElement ifCondition = new XslElement(XsltConstants.ifCondition);
if (rule.getCondition().isPrimary(rule)){
ifCondition.addAttribute("test", rule.getCondition().getConditionExpression());
ifCondition.addEnclosedElements(this.writeOutRuleComplex(rule.getCondition().getPrimaryRule(), rule.getCondition().getPrimaryRule().getUniqueName()));
// ifCondition.addEnclosedElements(this.writeOutRule(rule, rule.getTargetField()));
}else{
ifCondition.addAttribute("test", "not(" + rule.getCondition().getConditionExpression() + ")");
ifCondition.addEnclosedElements(this.writeOutRuleComplex(rule.getCondition().getPrimaryRule(), rule.getCondition().getPrimaryRule().getUniqueName()));
// ifCondition.addEnclosedElements(this.writeOutRule(rule, rule.getTargetField()));
}
return ifCondition.asXml();
}
/**
* @param rule
* @param targetFieldName
* @return
*/
public String writeOutRuleComplex(Rules rule, String targetFieldName){
// TODO a lot
XslElement targetField = new XslElement(targetFieldName);
if (rule.hasSet()){
for (Rules attrRule: rule.getRulesSet().getPendingRules()){
if (attrRule.getConstant().length() > 0){
targetField.addAttribute(attrRule.getAttribute(), attrRule.getConstant()); // TODO: check for value type (constant, function, xpath etc.)
}else if (attrRule.getAssignmentVariable().length() > 0){
XslElement attr = new XslElement(XsltConstants.attribute);
attr.addAttribute("name", attrRule.getAttribute());
XslElement valueOf = new XslElement(XsltConstants.valueOf, "select", attrRule.getAssignmentVariable());
attr.addEnclosedElements(valueOf.asXml());
targetField.addEnclosedElements(attr.asXml());
}else if (attrRule.getXpath().length() > 0){
XslElement attr = new XslElement(XsltConstants.attribute);
attr.addAttribute("name", attrRule.getAttribute());
XslElement valueOf = new XslElement(XsltConstants.valueOf, "select", attrRule.getXpath());
attr.addEnclosedElements(valueOf.asXml());
targetField.addEnclosedElements(attr.asXml());
}else{
XslElement valueOf;
log.debug("name of the rule in writeOutRuleComplex: " + rule.getUniqueName() + " " + attrRule.getAttribute());
if (attrRule.getFunctionCall().doPreprocess()){
valueOf = new XslElement(XsltConstants.valueOf, "select", attrRule.getFunctionCall().getXSLpreparatedFunctionCall());
}else{
valueOf = new XslElement(XsltConstants.valueOf, "select", attrRule.getFunctionCall().getXSLdirectFunctionCall(null));
}
XslElement attr = new XslElement(XsltConstants.attribute);
attr.addAttribute("name", attrRule.getAttribute());
attr.addEnclosedElements(valueOf.asXml());
targetField.addEnclosedElements(attr.asXml());
}
}
log.debug(targetField.asXml());
}
log.debug("unique name of rule: " + rule.getUniqueName());
if (rule.getXpath().length() > 0){
// TODO for-each
// throw new IllegalStateException("complex rule with Constant Assignment not yet implemented");
XslElement valueOf = new XslElement(XsltConstants.valueOf, "select", ".");
// // valueOf.setBoundPrefix(currentRule.getNamespace()); // needed here?
XslElement forEach = new XslElement(XsltConstants.forEach);
forEach.addAttribute("select", rule.getXpath());
//
// XslElement targetField = new XslElement(targetFieldName);
targetField.addEnclosedElements(valueOf.asXml());
forEach.addEnclosedElements(targetField.asXml());
//
return forEach.asXml();
}else if (rule.getAssignmentVariable().length() > 0){
log.debug("assignmentVar: " + rule.getAssignmentVariable());
XslElement valueOf = new XslElement(XsltConstants.valueOf, "select", rule.getAssignmentVariable());
targetField.addEnclosedElements(valueOf.asXml());
log.debug(targetField.asXml());
}else if (rule.getConstant().length() > 0){
throw new IllegalStateException("complex rule with Constant Assignment not yet implemented");
}
return targetField.asXml();
// if (rule.getConstant().length() > 0){
// if (rule.getAttribute().length() > 0){
// XslElement targetField = new XslElement(targetFieldName, rule.getAttribute(), rule.getConstant());
// System.out.println("XslConstructor: " + rule.getUniqueName());
// System.out.println("XslConstructor: " + rule.hasSet());
// System.out.println("XslConstructor: defines attribute: " + rule.definesAttribute());
// System.out.println("XslConstructor: attribute: " + rule.getAttribute());
//
// Iterator<Rules> rulesIterator = rule.getRulesSet().getPendingRules().iterator();
// while (rulesIterator.hasNext()){
// Rules pendingRule = rulesIterator.next();
// targetField.addAttribute(pendingRule.getAttribute(), pendingRule.getConstant());
// }
// return targetField.asXml();
// }
// }
// return "";
}
public String writeOutRuleCopy(Rules rule, String targetFieldName){
XslElement targetField = new XslElement(targetFieldName);
XslElement copy = new XslElement(XsltConstants.copyOf, "select", rule.getProperties().getProperty("copySelectExpression"));
targetField.addEnclosedElements(copy.asXml(true));
return targetField.asXml();
}
public String writeOutRule(Rules rule, String targetFieldName){
if (rule.getXpath().length() > 0){
// TODO consider the namespace
// TODO consider the number of occurrences of the source element - limited or unlimited, currently unlimited
XslElement valueOf = new XslElement(XsltConstants.valueOf, "select", ".");
// valueOf.setBoundPrefix(currentRule.getNamespace()); // needed here?
XslElement forEach = new XslElement(XsltConstants.forEach);
forEach.addAttribute("select", rule.getXpath());
XslElement targetField = new XslElement(targetFieldName);
targetField.addEnclosedElements(valueOf.asXml());
forEach.addEnclosedElements(targetField.asXml());
return forEach.asXml();
}else if (rule.getConstant().length() > 0){
// TODO case-distinction needed
// (1) it's a constant
// (2) it's an external function call
// (2.1) the external function call's argument may contain an xpath-expression, which has to be determined
// do not create 'for-each', just process the rule-function
XslElement xslText = new XslElement(XsltConstants.text);
xslText.addEnclosedElements(rule.getConstant());
XslElement targetField = new XslElement(targetFieldName);
targetField.addEnclosedElements(xslText.asXml());
return targetField.asXml();
}else if (rule.getAssignmentVariable().length() > 0){
// hardcoded workaround
XslElement targetField = new XslElement(targetFieldName);
// if (rule.getAssignmentVariable().equals("varId")){
// XslElement copyOf = new XslElement(XsltConstants.copyOf, "select", rule.getAssignmentVariable());
// targetField.addEnclosedElements(copyOf.asXml());
// }else{
XslElement valueOf = new XslElement(XsltConstants.valueOf, "select", rule.getAssignmentVariable());
targetField.addEnclosedElements(valueOf.asXml());
// }
return targetField.asXml();
}else if (rule.isEmpty()){
XslElement targetField = new XslElement(targetFieldName);
return targetField.asXml();
}else if (rule.isSkip()){
// TODO
XslElement callTemplate = new XslElement(XsltConstants.callTemplate, "name", "terminate");
//XslElement msgField = new XslElement(XsltConstants.message, "terminate", "yes");
//XslElement xslText = new XslElement(XsltConstants.text);
//xslText.addEnclosedElements("some default exception message");
//msgField.addEnclosedElements(xslText.asXml());
return callTemplate.asXml();
}else{
XslElement valueOf;
if (rule.getFunctionCall().doPreprocess()){
valueOf = new XslElement(XsltConstants.valueOf, "select", rule.getFunctionCall().getXSLpreparatedFunctionCall());
}else{
valueOf = new XslElement(XsltConstants.valueOf, "select", rule.getFunctionCall().getXSLdirectFunctionCall(null));
}
//valueOf.setBoundPrefix(ns_dnetExt);
XslElement targetField = new XslElement(targetFieldName);
targetField.addEnclosedElements(valueOf.asXml());
return targetField.asXml();
}
}
public String writeOutConditionalChoose(Rules rule){
XslElement choose = new XslElement(XsltConstants.choose);
XslElement when = new XslElement(XsltConstants.when);
when.addAttribute("test", rule.getCondition().getConditionExpression());
when.addEnclosedElements(this.writeOutRule(rule.getCondition().getPrimaryRule(), rule.getCondition().getPrimaryRule().getUniqueName()));
choose.addEnclosedElements(when.asXml());
XslElement otherwise = new XslElement(XsltConstants.otherwise);
otherwise.addEnclosedElements(this.writeOutRule(rule.getCondition().getSecondaryRule(), rule.getCondition().getSecondaryRule().getUniqueName()));
choose.addEnclosedElements(otherwise.asXml());
return choose.asXml();
}
public String writeOutConditionalIf(Rules rule){
XslElement ifCondition = new XslElement(XsltConstants.ifCondition);
if (rule.getCondition().isPrimary(rule)){
ifCondition.addAttribute("test", rule.getCondition().getConditionExpression());
ifCondition.addEnclosedElements(this.writeOutRule(rule, rule.getTargetField()));
}else{
ifCondition.addAttribute("test", "not(" + rule.getCondition().getConditionExpression() + ")");
ifCondition.addEnclosedElements(this.writeOutRule(rule, rule.getTargetField()));
}
return ifCondition.asXml();
}
public String writeOutApplyTemplates(String selectValue){
XslElement applyTemplates = new XslElement(XsltConstants.applyTemplates, "select", selectValue);
return applyTemplates.asXml(true);
}
public String writeOutCallTemplate(String aTemplateName){
XslElement callTemplate = new XslElement(XsltConstants.callTemplate, "name", aTemplateName);
return callTemplate.asXml();
}
public String writeOutApplyConditionalTemplateChoose(Rules rule){
XslElement forEach = new XslElement(XsltConstants.forEach);
forEach.addAttribute("select", rule.getCondition().getApplyExpression());
XslElement choose = new XslElement(XsltConstants.choose);
XslElement when = new XslElement(XsltConstants.when);
when.addAttribute("test", rule.getCondition().getConditionExpression());
XslElement targetField = new XslElement(rule.getCondition().getPrimaryRule().getTargetField());
// TODO check type of rule -> xpath, constant, function
XslElement valueOf = null;
Rules pRule = rule.getCondition().getPrimaryRule();
if (pRule.getFunctionCall() != null){
if (pRule.getFunctionCall().doPreprocess()){
valueOf = new XslElement(XsltConstants.valueOf, "select", pRule.getFunctionCall().getXSLpreparatedFunctionCall());
}else{
valueOf = new XslElement(XsltConstants.valueOf, "select", pRule.getFunctionCall().getXSLdirectFunctionCall(null));
}
}else{
valueOf = new XslElement(XsltConstants.valueOf, "select", pRule.getXpath());
}
targetField.addEnclosedElements(valueOf.asXml());
when.addEnclosedElements(targetField.asXml());
choose.addEnclosedElements(when.asXml());
XslElement otherwise = new XslElement(XsltConstants.otherwise);
targetField = new XslElement(rule.getCondition().getSecondaryRule().getTargetField());
// TODO check type of rule -> xpath, constant, function
Rules sRule = rule.getCondition().getSecondaryRule();
if (sRule.getFunctionCall() != null){
if (sRule.getFunctionCall().doPreprocess()){
valueOf = new XslElement(XsltConstants.valueOf, "select", sRule.getFunctionCall().getXSLpreparatedFunctionCall());
}else{
valueOf = new XslElement(XsltConstants.valueOf, "select", sRule.getFunctionCall().getXSLdirectFunctionCall(null));
}
}else{
valueOf = new XslElement(XsltConstants.valueOf, "select", sRule.getXpath());
}
targetField.addEnclosedElements(valueOf.asXml());
otherwise.addEnclosedElements(targetField.asXml());
choose.addEnclosedElements(otherwise.asXml());
return forEach.asXml();
}
public String writeOutApplyConditionalTemplateIf(Rules rule, boolean isComplexStructure){
// TODO check primary, alternative rule -> if, choose
XslElement forEach = new XslElement(XsltConstants.forEach);
forEach.addAttribute("select", rule.getCondition().getApplyExpression());
// store position in variable
XslElement posVar = new XslElement(XsltConstants.variable);
posVar.addAttribute("name", "posVar");
posVar.addAttribute("select", "position()");
forEach.addEnclosedElements(posVar.asXml());
// if case
XslElement ifCondition = new XslElement(XsltConstants.ifCondition);
if (rule.getCondition().isPrimary(rule)){
ifCondition.addAttribute("test", rule.getCondition().getConditionExpression());
Rules pRule = rule.getCondition().getPrimaryRule();
if (isComplexStructure){
ifCondition.addEnclosedElements(writeOutRuleComplex(pRule, pRule.getUniqueName()));
}else{
XslElement targetField = new XslElement(pRule.getTargetField());
// TODO check type of rule -> xpath, constant, function
XslElement valueOf = null;
if (pRule.getFunctionCall() != null){
if (pRule.getFunctionCall().doPreprocess()){
valueOf = new XslElement(XsltConstants.valueOf, "select", pRule.getFunctionCall().getXSLpositionFunctionCall());
}else{
valueOf = new XslElement(XsltConstants.valueOf, "select", pRule.getFunctionCall().getXSLdirectFunctionCall(null));
}
}else{
valueOf = new XslElement(XsltConstants.valueOf, "select", pRule.getXpath());
}
targetField.addEnclosedElements(valueOf.asXml());
ifCondition.addEnclosedElements(targetField.asXml());
}
forEach.addEnclosedElements(ifCondition.asXml());
}else{
ifCondition.addAttribute("test", "not(" + rule.getCondition().getConditionExpression() + ")");
Rules sRule = rule.getCondition().getSecondaryRule();
if (isComplexStructure){
ifCondition.addEnclosedElements(writeOutRuleComplex(sRule, sRule.getUniqueName()));
}else{
XslElement targetField = new XslElement(sRule.getTargetField());
// TODO check type of rule -> xpath, constant, function
XslElement valueOf = null;
if (sRule.getFunctionCall() != null){
if (sRule.getFunctionCall().doPreprocess()){
valueOf = new XslElement(XsltConstants.valueOf, "select", sRule.getFunctionCall().getXSLpositionFunctionCall());
}else{
valueOf = new XslElement(XsltConstants.valueOf, "select", sRule.getFunctionCall().getXSLdirectFunctionCall(null));
}
}else{
valueOf = new XslElement(XsltConstants.valueOf, "select", sRule.getXpath());
}
targetField.addEnclosedElements(valueOf.asXml());
ifCondition.addEnclosedElements(targetField.asXml());
}
forEach.addEnclosedElements(ifCondition.asXml());
}
return forEach.asXml();
}
public XslElement writeOutRecursiveTemplate(Rules rule, String templateName, Map<String, String> nsDeclarations) {
XslElement subTemplate = new XslElement(XsltConstants.template, "name", templateName);
XslElement param = new XslElement(XsltConstants.param);
param.addAttribute("name", templateName + "param");
param.addAttribute("select", rule.getFunctionCall().getXSLdirectFunctionCall(templateName)); // TODO functionCall
subTemplate.addEnclosedElements(param.asXml());
XslElement forEach = new XslElement(XsltConstants.forEach);
forEach.addAttribute("select", "$" + templateName + "param");
XslElement element = new XslElement(XsltConstants.element);
// split ns element name into element name and namespace
String targetElementName = rule.getFunctionCall().getParameters().get("elementName");
String namespace = "";
if (targetElementName.contains(":")){
String[] nameSplitting = targetElementName.split(":");
targetElementName = nameSplitting[1];
namespace = nsDeclarations.get(nameSplitting[0]);
}
element.addAttribute("name", targetElementName);
element.addAttribute("namespace", namespace);
element.addAttribute("inherit-namespaces", "yes");
XslElement valueOf = new XslElement(XsltConstants.valueOf, "select", ".");
element.addEnclosedElements(valueOf.asXml());
forEach.addEnclosedElements(element.asXml());
subTemplate.addEnclosedElements(forEach.asXml());
return subTemplate;
}
public String writeOutApplyAbout() {
//XslElement about = new XslElement("about");
XslElement copy = new XslElement(XsltConstants.copyOf, "select", "@*|//*[local-name()='about']");
// XslElement applyTemplatesSelect = new XslElement(XsltConstants.applyTemplates, "select", "@*|node()");
// copy.addEnclosedElements(applyTemplatesSelect.asXml());
//about.addEnclosedElements(copy.asXml());
return copy.asXml();
}
}

View File

@ -0,0 +1,43 @@
package eu.dnetlib.data.collective.transformation.core.xsl;
import java.util.Collection;
/**
* @author jochen
*
*/
public class XslElement extends AbstractXslElement {
public XslElement(String aFunctionName) {
super(aFunctionName);
}
public XslElement(String aFunctionName, String aAttrName, String aAttrValue) {
super(aFunctionName);
addAttribute(aAttrName, aAttrValue);
}
public void addAttribute(String aName, String aValue){
this.attrList.add(aName + "=\"" + aValue + "\" ");
}
public void addEnclosedElements(String aElements){
this.enclosedElements.append(aElements);
}
public void addBoundPrefix(String aNamespace){
this.nsList.add(aNamespace);
}
public void addAllBoundPrefixes(Collection<String> aNamespaceList){
this.nsList.addAll(aNamespaceList);
}
/**
* @return the isEmpty
*/
public boolean isEmpty() {
return !(enclosedElements.length() > 0);
}
}

View File

@ -0,0 +1,32 @@
package eu.dnetlib.data.collective.transformation.core.xsl;
/**
*
* @author js
*
*/
public class XsltConstants {
public static final String applyTemplates = "xsl:apply-templates";
public static final String attribute = "xsl:attribute";
public static final String callTemplate = "xsl:call-template";
public static final String choose = "xsl:choose";
public static final String copy = "xsl:copy";
public static final String copyOf = "xsl:copy-of";
public static final String element = "xsl:element";
public static final String forEach = "xsl:for-each";
public static final String ifCondition = "xsl:if";
public static final String message = "xsl:message";
public static final String otherwise = "xsl:otherwise";
public static final String param = "xsl:param";
public static final String template = "xsl:template";
public static final String text = "xsl:text";
public static final String valueOf = "xsl:value-of";
public static final String variable = "xsl:variable";
public static final String when = "xsl:when";
public static final String withParam = "xsl:with-param";
public static final String nsXsl = "xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\"";
public static final String extFuncNS = "TransformationFunction";
}

View File

@ -0,0 +1,345 @@
package eu.dnetlib.data.collective.transformation.core.xsl.ext;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.svenson.JSONParser;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.InputSource;
import eu.dnetlib.data.collective.transformation.engine.FunctionResults;
import eu.dnetlib.data.collective.transformation.engine.functions.Convert;
import eu.dnetlib.data.collective.transformation.engine.functions.IdentifierExtract;
import eu.dnetlib.data.collective.transformation.engine.functions.LookupRecord;
import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException;
import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression;
import eu.dnetlib.data.collective.transformation.engine.functions.Split;
/**
* The class implements external XSLT functions
* @author jochen
*
*/
public class TransformationFunctionProxy {
@SuppressWarnings("unused")
private static final Log log = LogFactory.getLog(TransformationFunctionProxy.class);
private static TransformationFunctionProxy tf;
private RegularExpression regExprFunction = new RegularExpression();
private Convert convertFunction;
private IdentifierExtract identifierExtractFunction = new IdentifierExtract();
private static DocumentBuilder docBuilder;
private static Transformer transformer;
private Split split = new Split();
private Map<String, FunctionResults> mapOfResults = new HashMap<String, FunctionResults>();
private LookupRecord lookupRecord;
private static XPath xpath = XPathFactory.newInstance().newXPath();
/**
* @return the transformationFunctionProxy instance
*/
public static TransformationFunctionProxy getInstance(){
if ( tf == null ){
tf = new TransformationFunctionProxy();
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(true);
docBuilder = dbf.newDocumentBuilder();
transformer = TransformerFactory.newInstance().newTransformer();
xpath.setNamespaceContext(new NamespaceContext() {
@Override
public Iterator getPrefixes(String namespaceURI) {
// TODO Auto-generated method stub
return null;
}
@Override
public String getPrefix(String namespaceURI) {
// TODO Auto-generated method stub
return null;
}
@Override
public String getNamespaceURI(String aPrefix) {
if (aPrefix == null){
throw new IllegalArgumentException("No prefix provided!");
}else if (aPrefix.equals(XMLConstants.DEFAULT_NS_PREFIX)){
return "http://namespace.openaire.eu";
}else if (aPrefix.equals("dc")){
return "http://purl.org/dc/elements/1.1/";
}else{
return XMLConstants.NULL_NS_URI;
}
}
});
} catch (Exception e) {
log.fatal("error while instantiating DocumentBuilderFactory, Transfomer, Xpath.namespacecontext", e);
throw new IllegalStateException(e);
}
}
return tf;
}
/**
* @param uniqueKey
* @param i
* @return
*/
public String getValue(String uniqueKey, int i){
if ( !mapOfResults.containsKey(uniqueKey)){
throw new IllegalStateException("unknown key: " + uniqueKey);
}
return mapOfResults.get(uniqueKey).get(i);
}
/**
* @param uniqueKey
* @param i
* @return
* @deprecated
*/
// public String convert(String uniqueKey, int i){
// if (mapOfResults == null){
// return "transformationFunctionProxy_convert not initialized";
// }else{
// if (!mapOfResults.containsKey(uniqueKey)){
// throw new IllegalStateException("unknown key: " + uniqueKey);
// }
// return mapOfResults.get(uniqueKey).get(i);
// }
// }
/**
* @param uniqueKey
* @param i
* @param aPos
* @return
* @deprecated
*/
// public String convert(String uniqueKey, int i, int aPos){
// if (mapOfResults == null){
// return "transformationFunctionProxy_convert not initialized";
// }else{
// if (!mapOfResults.containsKey(uniqueKey)){
// throw new IllegalStateException("unknown key: " + uniqueKey);
// }
// return mapOfResults.get(uniqueKey).get(i, aPos);
// }
// }
/**
* @param uniqueKey
* @param i
* @return
*/
public String extract(String uniqueKey, int i){
if (mapOfResults == null){
return "transformationFunctionProxy_extract not initialized";
}else{
if (!mapOfResults.containsKey(uniqueKey)){
throw new IllegalStateException("unknown key: " + uniqueKey);
}
return mapOfResults.get(uniqueKey).get(i);
}
}
/**
* normalize values given as an input value by using a vocabulary
* @param aInput - the value as a String
* @param aVocabularyName - the name of the vocabulary, which must be known for the vocabulary registry
* @return
*/
public synchronized String convertString(String aInput, String aVocabularyName){
List<String> values = new LinkedList<String>();
values.add(aInput);
try {
log.debug("conversion input: " + aInput);
String conversionResult = convertFunction.executeSingleValue(aVocabularyName, values);
log.debug("conversion result: " + conversionResult);
return conversionResult;
} catch (ProcessingException e) {
log.fatal("convert failed for args 'input': " + aInput + " , 'vocabularyName': " + aVocabularyName, e);
throw new IllegalStateException(e);
}
}
/**
* normalize values given as a NodeList by using a vocabulary
* @param aInput - the input values as NodeList
* @param aVocabularyName - the name of the vocabulary, which must be known for the vocabulary registry
* @return
*/
public synchronized String convert(NodeList aInput, String aVocabularyName){
List<String> values = new LinkedList<String>();
getTextFromNodeList(aInput, values);
try {
return convertFunction.executeSingleValue(aVocabularyName, values);
} catch (ProcessingException e) {
throw new IllegalStateException(e);
}
}
public synchronized String convert(NodeList aInput, String aVocabularyName, String aDefaultPattern, String aFunction){
List<String> values = new LinkedList<String>();
getTextFromNodeList(aInput, values);
try {
List<String> results = convertFunction.executeFilterByParams(aVocabularyName, values, aDefaultPattern, aFunction);
if (results.size() > 0)
return results.get(0);
else
return "";
} catch (ProcessingException e) {
throw new IllegalStateException(e);
}
}
private void getTextFromNodeList(NodeList aNodeList, List<String> aTextvalues){
for (int i = 0; i < aNodeList.getLength(); i++){
Node n = aNodeList.item(i);
if (n.getNodeType() == Node.ELEMENT_NODE)
getTextFromNodeList(n.getChildNodes(), aTextvalues);
else if (n instanceof Text)
aTextvalues.add(n.getNodeValue());
}
}
/**
* substitutes using regular expression
* @param aInput
* @param aReplacement
* @param aRegularExpression
* @return
*/
public synchronized String regExpr(String aInput, String aReplacement, String aRegularExpression){
try {
int lastSlash = aRegularExpression.lastIndexOf("/");
String trailingOptions = aRegularExpression.substring(lastSlash);
// log.debug("trailingOptions: " + trailingOptions);
int replacementSlash = aRegularExpression.substring(0, lastSlash).lastIndexOf("/");
String replacementFromExpression = aRegularExpression.substring(replacementSlash + 1, lastSlash);
// log.debug("replacementFromExpr lengt: " + replacementFromExpression.length() + ", value: " + replacementFromExpression);
String newRegExpr = aRegularExpression.substring(0, replacementSlash + 1) + aReplacement + replacementFromExpression + trailingOptions;
// log.debug("newRegExpr: " + newRegExpr);
return regExprFunction.executeSingleValue(newRegExpr, aInput, aReplacement);
} catch (ProcessingException e) {
throw new IllegalStateException(e);
}
}
public String lookup(String aIdentifier, String aPropertyKey){
log.debug("functionProxy.lookup: param identifier: " + aIdentifier + " , key: " + aPropertyKey);
return this.lookupRecord.getPropertyValue(aIdentifier, aPropertyKey);
}
public synchronized Collection<String> split(NodeList aInput, String aRegularExpression, String aCallId){
try {
List<String> textValues = new LinkedList<String>();
getTextFromNodeList(aInput, textValues);
return split.executeAllValues(textValues, aRegularExpression);
//return split.executeSingleValue(textValues, aRegularExpression, aCallId);
}catch (ProcessingException e){
throw new IllegalStateException(e);
}
}
public synchronized String split(String aCallId){
try {
return split.executeSingleValue(aCallId);
}catch (ProcessingException e){
throw new IllegalStateException(e);
}
}
/**
* extract content that match pattern given by a regular expression from a given node
* @param aXpathExprJson
* @param aInput
* @param aRegExpression
* @return nodeList
*/
public synchronized NodeList identifierExtract(String aXpathExprJson, Node aInput, String aRegExpression){
String xpathExprJson = StringEscapeUtils.unescapeXml(aXpathExprJson);
log.debug("unescape xpathExprJson: " + xpathExprJson);
String regExpression = StringEscapeUtils.unescapeXml(aRegExpression);
log.debug("unescape regExpr" + regExpression);
try{
List<String> xpathExprList = JSONParser.defaultJSONParser().parse(List.class, xpathExprJson);
// workaround: rewrap, why ?
DOMSource s = new DOMSource(aInput);
StringWriter w = new StringWriter();
Result r = new StreamResult(w);
transformer.transform(s, r);
Document doc = docBuilder.parse(new InputSource(new StringReader(w.toString())));
return identifierExtractFunction.extract(xpathExprList, doc, regExpression, docBuilder.newDocument(), xpath);
}catch(Exception e){
log.fatal("identifierExtract failed for node value: " + aInput.getNodeValue(), e);
throw new IllegalStateException(e.getMessage());
}
}
/**
* @param key
* @param resultsFunction_getvalue
*/
public void setResults(String key, FunctionResults resultsFunction_getvalue) {
mapOfResults.put(key, resultsFunction_getvalue);
}
/**
* @param convertFunction the convertFunction to set
*/
public void setConvertFunction(Convert convertFunction) {
this.convertFunction = convertFunction;
}
/**
* @return the convertFunction
*/
public Convert getConvertFunction() {
return convertFunction;
}
/**
* @return the lookupRecord
*/
public LookupRecord getLookupRecord() {
return lookupRecord;
}
/**
* @param lookupRecord the lookupRecord to set
*/
public void setLookupRecord(LookupRecord lookupRecord) {
this.lookupRecord = lookupRecord;
}
}

View File

@ -0,0 +1,67 @@
package eu.dnetlib.data.collective.transformation.engine;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
* @author js
*
*/
public class FunctionResults {
private Map<String, List<String>> resultMap = new LinkedHashMap<String, List<String>>();
/**
* get the first single result from the result list at the given index
* @param aIndex
* @return a result
*/
public String get(int aIndex){
return resultMap.get(aIndex + "").get(0);
}
/**
* get the single result for the node at the given position from the list at the given index
* @param aIndex
* @param aPosition
* @return a result
*/
public String get(int aIndex, int aPosition){
if (aPosition <= 0){
throw new IllegalArgumentException("position is " + aPosition + ", must be greater 0");
}
return resultMap.get(aIndex + "").get(aPosition - 1);
}
/**
* add a collection containing the results for each record
* @param aCollection
*/
public void addAll(Collection<? extends String> aCollection){
for (String result : aCollection){
add(result);
}
}
/**
* add a single result calculated for a record node
* @param aResult
*/
public void add(String aResult){
List<String> resultList = new LinkedList<String>();
resultList.add(aResult);
resultMap.put(resultMap.size() + "", resultList);
}
/**
* add a list of results calculated for all resp. record nodes
* @param aResults
*/
public void add(List<String> aResults){
resultMap.put(resultMap.size() + "", aResults);
}
}

View File

@ -0,0 +1,240 @@
package eu.dnetlib.data.collective.transformation.engine;
import java.io.StringReader;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Node;
import org.dom4j.XPath;
import org.dom4j.io.SAXReader;
import eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy;
import eu.dnetlib.data.collective.transformation.engine.functions.Convert;
import eu.dnetlib.data.collective.transformation.engine.functions.Extract;
import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException;
import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression;
import eu.dnetlib.data.collective.transformation.engine.functions.RetrieveValue;
import eu.dnetlib.data.collective.transformation.rulelanguage.IRule;
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall;
/**
* @author jochen
*
*/
public class PreProcessor {
@SuppressWarnings("unused")
private static final Log log = LogFactory.getLog(PreProcessor.class);
private Convert convertFunction;
private Extract extractFunction;
private RetrieveValue retrieveFunction;
private RegularExpression regExprFunction;
private TransformationFunctionProxy functionProxy;
private SAXReader reader = new SAXReader();
private Map<String, String> nsMap = new HashMap<String, String>();
/**
* pre-process output values from object records using a function call
* @param aFunctionCall function call object
* @param aObjectRecords list of object records
* @param aNamespaceMap map of namespace prefixes and uris
*/
public void preprocess(
FunctionCall aFunctionCall,
List<String> aObjectRecords,
Map<String, String> aNamespaceMap,
Map<String, String> aStaticResults,
Map<String, String> aJobProperties,
Map<String, IRule> aVarRules){
this.nsMap = aNamespaceMap;
FunctionResults functionResults = new FunctionResults();
try {
if (aFunctionCall.getExternalFunctionName().equals("extract")){
String featureName = aFunctionCall.getParameters().get(Extract.paramNameFeature);
functionResults.addAll(extractFunction.execute(aObjectRecords, featureName));
}else{
for (String objRecord: aObjectRecords){
String result = null;
if (aFunctionCall.getExternalFunctionName().equals("convert")){
if (aFunctionCall.isStatic() && aStaticResults.containsKey(aFunctionCall.getUuid())){
functionResults.add(aStaticResults.get(aFunctionCall.getUuid()));
}else{
String vocabName = aFunctionCall.getParameters().get(Convert.paramVocabularyName);
String fieldExpr = aFunctionCall.getParameters().get(Convert.paramFieldValue);
List<String> recordValues = getValuesFromRecord(objRecord, fieldExpr);
if (aFunctionCall.isStatic())
aStaticResults.put(aFunctionCall.getUuid(), convertFunction.executeSingleValue(vocabName, recordValues));
else
functionResults.add(convertFunction.executeAllValues(vocabName, recordValues));
}
}else if (aFunctionCall.getExternalFunctionName().equals("getValue")){
if (aFunctionCall.isStatic() && aStaticResults.containsKey(aFunctionCall.getUuid()))
functionResults.add(aStaticResults.get(aFunctionCall.getUuid()));
else{
String functionName = aFunctionCall.getParameters().get(RetrieveValue.paramFunctionName);
result = retrieveFunction.executeSingleValue(functionName, aFunctionCall.getArguments(), objRecord, nsMap);
functionResults.add(result);
if (aFunctionCall.isStatic())
aStaticResults.put(aFunctionCall.getUuid(), result);
}
}else if (aFunctionCall.getExternalFunctionName().equals("regExpr")){
// TODO
if (aFunctionCall.isStatic() && aStaticResults.containsKey(aFunctionCall.getUuid())){
//log.debug("static functioncal; static result exist to compute regexpr: " + aFunctionCall.getParameters().get(RegularExpression.paramRegularExpr));
// functionResults.add(aStaticResults.get(aFunctionCall.getUuid()));
}else{
// log.debug("static functioncal to compute regexpr: " + aFunctionCall.getParameters().get(RegularExpression.paramRegularExpr));
String regularExpression = aFunctionCall.getParameters().get(RegularExpression.paramRegularExpr); //.replaceAll("'", "");
String expression1 = aFunctionCall.getParameters().get(RegularExpression.paramExpr1);
List<String> recordValues = null;
// distinguish xpath-expr, jobConst, var
// log.debug("expression1: " + expression1);
if (aJobProperties.containsKey(expression1)){
recordValues = new LinkedList<String>();
recordValues.add(aJobProperties.get(expression1));
}else{
recordValues = getValuesFromRecord(objRecord, expression1);
}
String expression2 = aFunctionCall.getParameters().get(RegularExpression.paramExpr2);
String replacement = "";
if (aJobProperties.containsKey(expression2)){
replacement = aJobProperties.get(expression2);
}else if (aVarRules.containsKey(expression2)){
Rules varRule = (Rules)aVarRules.get(expression2);
replacement = varRule.getConstant().replace("'", ""); // currently limited to constant rules.
}else {
replacement = getValuesFromRecord(objRecord, expression2).get(0); // get the first available value
}
List<String> regExprResults = new LinkedList<String>();
for (String fieldValue: recordValues){
try {
int lastSlash = regularExpression.lastIndexOf("/");
String trailingOptions = regularExpression.substring(lastSlash);
int replacementSlash = regularExpression.substring(0, lastSlash).lastIndexOf("/");
String replacementFromExpression = regularExpression.substring(replacementSlash + 1, lastSlash);
String newRegExpr = regularExpression.substring(0, replacementSlash + 1) + replacement + replacementFromExpression + trailingOptions; // ???
result = regExprFunction.executeSingleValue(newRegExpr, fieldValue, replacement);
regExprResults.add(result);
} catch (ProcessingException e) {
throw new IllegalStateException(e);
}
// regExprResults.add(regExprFunction.executeSingleValue(regularExpression, fieldValue, expression2));
}
functionResults.add(regExprResults);
// assuming 1 result only
if (aFunctionCall.isStatic()){
aStaticResults.put(aFunctionCall.getUuid(), result);
}
// unsupported
// if (aFunctionCall.isStatic()){
// aStaticResults.put(aFunctionCall.getUuid(), result);
// }
}
}
}
}
} catch (ProcessingException e) {
throw new IllegalStateException(e);
} catch (DocumentException e) {
throw new IllegalStateException(e);
}
functionProxy.setResults(aFunctionCall.getUuid(), functionResults);
}
public void setFunctionProxy(TransformationFunctionProxy functionProxy) {
this.functionProxy = functionProxy;
}
public TransformationFunctionProxy getFunctionProxy() {
return functionProxy;
}
public void setConvertFunction(Convert convertFunction) {
this.convertFunction = convertFunction;
}
public Convert getConvertFunction() {
return convertFunction;
}
/**
* @param retrieveFunction the retrieveFunction to set
*/
public void setRetrieveFunction(RetrieveValue retrieveFunction) {
this.retrieveFunction = retrieveFunction;
}
/**
* @return the retrieveFunction
*/
public RetrieveValue getRetrieveFunction() {
return retrieveFunction;
}
/**
* @return the regExprFunction
*/
public RegularExpression getRegExprFunction() {
return regExprFunction;
}
/**
* @param regExprFunction the regExprFunction to set
*/
public void setRegExprFunction(RegularExpression regExprFunction) {
this.regExprFunction = regExprFunction;
}
/**
* @param extractFunction the extractFunction to set
*/
public void setExtractFunction(Extract extractFunction) {
this.extractFunction = extractFunction;
}
/**
* @return the extractFunction
*/
public Extract getExtractFunction() {
return extractFunction;
}
/**
* evaluate given XPath Expr applied on a record and return the values as a list of strings
* @param record
* @param xpathExpr
* @return list of strings
* @throws DocumentException
*/
@SuppressWarnings("unchecked")
private List<String> getValuesFromRecord(String record, String xpathExpr) throws DocumentException{
List<String> values = new LinkedList<String>();
Document doc = reader.read(new StringReader(record));
XPath xpath = DocumentHelper.createXPath(xpathExpr);
xpath.setNamespaceURIs(nsMap);
Object context = xpath.evaluate(doc);
if (context instanceof String)
values.add((String)context);
else if (context instanceof List)
for (Node node: (List<Node>)context)
values.add(node.getText());
else if (context instanceof Node)
values.add( ((Node)context).getText());
else if (context instanceof Number)
values.add( ((Number)context).intValue() + "");
return values;
}
}

View File

@ -0,0 +1,409 @@
package eu.dnetlib.data.collective.transformation.engine;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import net.sf.saxon.instruct.TerminationException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.core.io.Resource;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import eu.dnetlib.common.profile.ResourceDao;
import eu.dnetlib.data.collective.transformation.IDatabaseConnector;
import eu.dnetlib.data.collective.transformation.TransformationException;
import eu.dnetlib.data.collective.transformation.VocabularyRegistry;
import eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy;
import eu.dnetlib.data.collective.transformation.engine.core.ITransformation;
import eu.dnetlib.data.collective.transformation.engine.functions.Convert;
// import eu.dnetlib.data.collective.transformation.engine.functions.Dblookup;
import eu.dnetlib.data.collective.transformation.engine.functions.Extract;
import eu.dnetlib.data.collective.transformation.engine.functions.IFeatureExtraction;
import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException;
import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression;
import eu.dnetlib.data.collective.transformation.engine.functions.RetrieveValue;
import eu.dnetlib.data.collective.transformation.engine.functions.RetrieveValue.FUNCTION;
import eu.dnetlib.data.collective.transformation.rulelanguage.Argument;
import eu.dnetlib.data.collective.transformation.rulelanguage.Argument.Type;
import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall;
import eu.dnetlib.data.collective.transformation.utils.BlacklistConsumer;
/**
* @author jochen
*
*/
public class SimpleTransformationEngine {
private static Log log = LogFactory.getLog(SimpleTransformationEngine.class);
private ITransformation transformation;
private VocabularyRegistry vocabularyRegistry;
private IDatabaseConnector databaseConnector;
private ResourceDao resourceDao;
private IFeatureExtraction featureExtraction;
private final List<String> mdRecords = new LinkedList<String>();
private long totalTransformedRecords = 0;
private long totalIgnoredRecords = 0;
private String mappingFile;
private boolean stylesheetParamsCalculated = false;
private boolean preprocessingDone = false;
private Map<String, String> stylesheetParams = new LinkedHashMap<String, String>();
private Resource blacklistApi;
private List<String> blacklistedRecords = new LinkedList<String>();
/**
* execute any preprocessings declared in the transformation script prior starting the transformation of records
*/
public void preprocess(String dataSourceId) {
for (Map<String, String> preprocMap : this.transformation.getRuleLanguageParser().getPreprocessings()) {
Iterator<String> it = preprocMap.keySet().iterator();
while (it.hasNext()) {
String function = it.next();
// if (function.equals("dblookup")) {
// Dblookup fun = new Dblookup();
// fun.setDbConnector(databaseConnector);
// try {
// log.debug("preprocessingMap value: " + preprocMap.get(function));
// TransformationFunctionProxy.getInstance().setLookupRecord(fun.getResults(preprocMap.get(function)));
// } catch (Exception e) {
// log.debug(e.getMessage());
// throw new IllegalStateException(e);
// }
// }
if (function.equals("blacklist")) {
BlacklistConsumer bc = new BlacklistConsumer();
try{
blacklistedRecords = bc.getBlackList(blacklistApi.getURL() + dataSourceId);
}catch(Exception e){
throw new IllegalStateException("error in preprocess: " + e.getMessage());
}
}
}
}
log.debug("preprocessing done.");
}
/**
* check if blacklistedRecords exist and if so check if the current record is blacklisted by its objIdentifier
* @param aRecord
* @return
* @throws XPathExpressionException
* @throws ProcessingException
*/
private boolean isBlacklistRecord(String aRecord){
if (blacklistedRecords.size() == 0) return false;
XPath xpath = XPathFactory.newInstance().newXPath();
try{
Node root = (Node) xpath.evaluate("/", new InputSource(new StringReader(aRecord)), XPathConstants.NODE);
String objId = xpath.evaluate("//*[local-name()='objIdentifier']", root);
if (blacklistedRecords.contains(objId)) return true;
}catch(Exception e){
throw new IllegalStateException("error in isBlacklistRecord: " + e.getMessage());
}
return false;
}
/**
* transforms a source record
*
* @param sourceRecord
* the record to transform
* @return transformed record
*/
public String transform(final String sourceRecord) {
List<String> objectRecords = new LinkedList<String>();
objectRecords.add(sourceRecord);
int index = 0;
mdRecords.clear();
initTransformationFunction();
if (!stylesheetParamsCalculated) {
try{
calculateStylesheetParams(sourceRecord);
}catch(Exception e){
throw new IllegalStateException("error in calculateStylesheetParams" + e.getMessage());
}
}
if (!preprocessingDone){
// xpath sourceRecord dataSourceid
preprocess(stylesheetParams.get("varBlacklistDataSourceId"));
preprocessingDone = true;
}
if (isBlacklistRecord(sourceRecord)){
try{
mdRecords.add(transformation.transformRecord(sourceRecord, ITransformation.XSLSyntaxcheckfailed));
}catch(Exception e){
log.fatal(sourceRecord);
throw new IllegalStateException(e);
}
}else if (!transformation.getRuleLanguageParser().isXslStylesheet()) {
// iterate over all rules which are functionCalls
log.debug("functionCalls size: " + transformation.getRuleLanguageParser().getFunctionCalls().size());
for (FunctionCall functionCall : transformation.getRuleLanguageParser().getFunctionCalls()) {
preprocess(objectRecords, functionCall);
}
for (String record : objectRecords) {
// log.debug(record);
try {
log.debug("now run transformation for record with index: " + index);
try{
String transformedRecord = transformation.transformRecord(record, index);
mdRecords.add(transformedRecord);
} catch (TerminationException e){
log.debug("record transformation terminated.");
String failedRecord = transformation.transformRecord(record, ITransformation.XSLSyntaxcheckfailed);
log.debug(failedRecord);
totalIgnoredRecords++;
mdRecords.add(failedRecord);
}
} catch (TransformationException e) {
log.error(sourceRecord);
throw new IllegalStateException(e);
}
index++;
}
} else {
for (String record : objectRecords) {
// test for init params and assign values
try {
log.debug("now run transformation for record with index: " + index);
try{
String transformedRecord = transformation.transformRecord(record, stylesheetParams);
mdRecords.add(transformedRecord);
}catch(TerminationException e){
String failedRecord = transformation.transformRecord(record, ITransformation.XSLSyntaxcheckfailed);
totalIgnoredRecords++;
log.debug(failedRecord);
mdRecords.add(failedRecord);
}
} catch (TransformationException e) {
log.error(sourceRecord);
throw new IllegalStateException(e);
}
index++;
}
}
totalTransformedRecords = totalTransformedRecords + mdRecords.size();
log.debug("objRecordSize: " + objectRecords.size() + ", mdRecordSize: " + mdRecords.size() + ", ignoredRecordSize: " + totalIgnoredRecords);
return mdRecords.get(0);
}
private void calculateStylesheetParams(final String aRecord) throws XPathExpressionException, ProcessingException {
stylesheetParamsCalculated = true;
XPath xpath = XPathFactory.newInstance().newXPath();
Node root = (Node) xpath.evaluate("/", new InputSource(new StringReader(aRecord)), XPathConstants.NODE);
String datasourcePrefix = xpath.evaluate("//*[local-name()='datasourceprefix']", root);
String profileXquery = "collection('/db/DRIVER/RepositoryServiceResources')//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=\"NamespacePrefix\"][value=\"" + datasourcePrefix + "\"]]";
//String repositoryId = xpath.evaluate("//*[local-name()='repositoryId']", root);
log.debug("profileXquery: " + profileXquery);
// static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId",
// xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
RetrieveValue retrieveValue = new RetrieveValue();
retrieveValue.setResourceDao(resourceDao);
List<Argument> argList = new LinkedList<Argument>();
argList.add(new Argument(Type.VALUE, profileXquery));
Argument argXpath = new Argument(Type.INPUTFIELD, "//OFFICIAL_NAME");
argList.add(argXpath);
String varOfficialName = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
stylesheetParams.put("varOfficialName", varOfficialName);
argList.remove(argXpath);
argXpath = new Argument(Type.INPUTFIELD, "//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value");
argList.add(argXpath);
String varDataSourceId = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
stylesheetParams.put("varDataSourceId", varDataSourceId);
argList.remove(argXpath);
argXpath = new Argument(Type.INPUTFIELD, "//CONFIGURATION/DATASOURCE_TYPE");
argList.add(argXpath);
String varDsType = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
stylesheetParams.put("varDsType", varDsType);
argList.remove(argXpath);
// if blacklist
for (Map<String, String> preprocMap : this.transformation.getRuleLanguageParser().getPreprocessings()) {
Iterator<String> it = preprocMap.keySet().iterator();
while (it.hasNext()) {
String function = it.next();
if (function.equals("blacklist")) {
argXpath = new Argument(Type.INPUTFIELD, preprocMap.get(function)); // blacklistDataSourceIdXpath
argList.add(argXpath);
String varBlacklistDataSourceId = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap<String, String>());
stylesheetParams.put("varBlacklistDataSourceId", varBlacklistDataSourceId);
argList.remove(argXpath);
}
}
}
}
private void initTransformationFunction() {
if (this.vocabularyRegistry == null) { throw new IllegalStateException("vocabularyReg is null"); }
Convert convertFunction = new Convert();
convertFunction.setVocabularyRegistry(this.vocabularyRegistry);
TransformationFunctionProxy.getInstance().setConvertFunction(convertFunction);
}
/**
* preprocesses function if function is configured resp.
*
* @param records
* list of object records
* @param aFunctionCall
*/
private void preprocess(final List<String> records, final FunctionCall aFunctionCall) {
try {
log.debug("preprocess");
if (transformation.getRuleLanguageParser() == null) { throw new IllegalStateException("rulelanguageparser not initialised"); }
if (transformation.getRuleLanguageParser().getNamespaceDeclarations() == null) { throw new IllegalStateException("nsDecl is null"); }
PreProcessor preProc = new PreProcessor();
preProc.setConvertFunction(TransformationFunctionProxy.getInstance().getConvertFunction());
RetrieveValue retrieveValue = new RetrieveValue();
retrieveValue.setResourceDao(resourceDao);
preProc.setRetrieveFunction(retrieveValue);
RegularExpression regExpr = new RegularExpression();
preProc.setRegExprFunction(regExpr);
TransformationFunctionProxy functionProxy = TransformationFunctionProxy.getInstance();
preProc.setFunctionProxy(functionProxy);
Extract extractFunction = new Extract();
extractFunction.setFeatureExtraction(featureExtraction);
preProc.setExtractFunction(extractFunction);
if (aFunctionCall.doPreprocess() || aFunctionCall.isStatic()) {
// log.debug("now call preprocess with: " + aFunctionCall.getExternalFunctionName() + " " + aFunctionCall.getUuid());
preProc.preprocess(
aFunctionCall,
records,
transformation.getRuleLanguageParser().getNamespaceDeclarations(),
transformation.getStaticTransformationResults(),
transformation.getJobProperties(),
transformation.getRuleLanguageParser().getVariableMappingRules());
// log.debug("preprocess end");
} else {
log.debug("skip preprocessing for function: " + aFunctionCall.getExternalFunctionName());
}
} catch (Exception e) {
throw new IllegalStateException(e);
}
}
/**
* @param transformation
* the transformation to set
*/
public void setTransformation(final ITransformation transformation) {
this.transformation = transformation;
}
/**
* @return the transformation
*/
public ITransformation getTransformation() {
return transformation;
}
/**
* @param vocabularyRegistry
* the vocabularyRegistry to set
*/
public void setVocabularyRegistry(final VocabularyRegistry vocabularyRegistry) {
this.vocabularyRegistry = vocabularyRegistry;
}
/**
* @return the vocabularyRegistry
*/
public VocabularyRegistry getVocabularyRegistry() {
return vocabularyRegistry;
}
/**
* @return the resourceDao
*/
public ResourceDao getResourceDao() {
return resourceDao;
}
/**
* @param resourceDao
* the resourceDao to set
*/
public void setResourceDao(final ResourceDao resourceDao) {
this.resourceDao = resourceDao;
}
/**
* @param featureExtraction
* the featureExtraction to set
*/
public void setFeatureExtraction(final IFeatureExtraction featureExtraction) {
this.featureExtraction = featureExtraction;
}
/**
* @return the featureExtraction
*/
public IFeatureExtraction getFeatureExtraction() {
return featureExtraction;
}
/**
* @return the databaseConnector
*/
public IDatabaseConnector getDatabaseConnector() {
return databaseConnector;
}
/**
* @param databaseConnector
* the databaseConnector to set
*/
public void setDatabaseConnector(final IDatabaseConnector databaseConnector) {
this.databaseConnector = databaseConnector;
}
public long getTotalTransformedRecords() {
return this.totalTransformedRecords;
}
public long getTotalIgnoredRecords() {
return this.totalIgnoredRecords;
}
/**
* @return the mappingFile
*/
public String getMappingFile() {
return mappingFile;
}
/**
* @param mappingFile
* the mappingFile to set
*/
public void setMappingFile(final String mappingFile) {
this.mappingFile = mappingFile;
}
public Resource getBlacklistApi() {
return blacklistApi;
}
public void setBlacklistApi(Resource blacklistApi) {
this.blacklistApi = blacklistApi;
}
}

View File

@ -0,0 +1,54 @@
package eu.dnetlib.data.collective.transformation.engine.core;
import java.util.Map;
import java.util.Properties;
import net.sf.saxon.instruct.TerminationException;
import eu.dnetlib.data.collective.transformation.TransformationException;
import eu.dnetlib.data.collective.transformation.rulelanguage.RuleLanguageParser;
/**
* @author jochen
*
*/
public interface ITransformation {
public static final String JOBCONST_DATASINKID = "$job.datasinkid";
public static final String XSLSyntaxcheckfailed = "syntaxcheckfailed.xsl";
/**
* transforms a single record
*
* @param aRecord the record to transform
* @param aIndex
* @return the transformed record
* @throws TerminationException, TransformationServiceException
*/
public String transformRecord(String aRecord, int aIndex) throws TerminationException, TransformationException;
/**
* transforms a single record whyle applying a stylesheet
* @param aRecord
* @param aStylesheet
* @return
* @throws TransformationException
*/
public String transformRecord(String aRecord, String aStylesheet) throws TransformationException;
public String transformRecord(String aRecord, Map<String, String> aStylesheetParams) throws TerminationException, TransformationException;
/**
* @return the rule language parser
*/
public RuleLanguageParser getRuleLanguageParser();
public Map<String, String> getStaticTransformationResults();
public Map<String, String> getJobProperties();
/**
* get log information that was recorded during transformation
* @return properties
*/
public Properties getLogInformation();
}

View File

@ -0,0 +1,335 @@
package eu.dnetlib.data.collective.transformation.engine.core;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import javax.xml.namespace.NamespaceContext;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.dnetlib.data.collective.transformation.core.schema.SchemaElement;
import eu.dnetlib.data.collective.transformation.core.schema.SchemaInspector;
import eu.dnetlib.data.collective.transformation.core.xsl.XslConstructor;
import eu.dnetlib.data.collective.transformation.core.xsl.XsltConstants;
import eu.dnetlib.data.collective.transformation.core.xsl.XslElement;
import eu.dnetlib.data.collective.transformation.rulelanguage.IRule;
import eu.dnetlib.data.collective.transformation.rulelanguage.RuleLanguageParser;
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
import eu.dnetlib.data.collective.transformation.rulelanguage.util.Converter;
/**
* @author jochen
*
*/
public class StylesheetBuilder {
private static final Log log = LogFactory.getLog(StylesheetBuilder.class);
private SchemaInspector schemaInspector;
private RuleLanguageParser ruleLanguageParser;
private NamespaceContext namespaceContext;
// implicit rule for deleted records
private final String elementNameIndicatingDeletedRecords = "header";
private final String attributeNameIndicatingDeletedRecords = "status";
private final String attributeValueIndicatingDeletedRecords = "deleted";
private final String elementNameAbout = "about";
public String createTemplate(){
if (schemaInspector == null || ruleLanguageParser == null || namespaceContext == null){
throw new IllegalStateException("StylesheetBuidler is not initialized with schemaInspector or ruleLanguageParser or namespaceContext.");
}
if (!schemaInspector.isInspected()){
throw new IllegalStateException("schemaInspector must first inspect in order to create a stylesheet.");
}
StringBuilder builder = new StringBuilder();
XslElement templateRoot = new XslElement("templateroot");
templateRoot.addBoundPrefix(XsltConstants.nsXsl);
templateRoot.addAllBoundPrefixes(Converter.getBoundPrefixes(this.ruleLanguageParser.getNamespaceDeclarations()));
XslElement template = new XslElement(XsltConstants.template);
template.addAttribute("match", "/");
Map<String, Set<IRule>> ruleMapping = this.ruleLanguageParser.getElementMappingRules();
Map<String, IRule> variableRuleMapping = this.ruleLanguageParser.getVariableMappingRules();
Map<String, IRule> templateRuleMapping = this.ruleLanguageParser.getTemplateMappingRules();
Queue<String> templateQueue = new LinkedList<String>();
XslElement rootField = new XslElement(schemaInspector.getRootElement());
XslConstructor xslConstructor = new XslConstructor();
/**
* *
*/
// Iterator<String> keyIterator = ruleMapping.keySet().iterator();
// while(keyIterator.hasNext()){
// System.out.println("stylesheetbuilder: key: " + keyIterator.next());
// }
/*
*
*/
int templateCounter = 1;
int standaloneTemplateCounter = 1;
// write variables at the beginning of the templateRoot
for (String variable: variableRuleMapping.keySet()){
IRule currentVariableRule = variableRuleMapping.get(variable);
templateRoot.addEnclosedElements(xslConstructor.writeOutVariableRule((Rules)currentVariableRule));
}
Map<String, String> targetFieldTemplateMap = new LinkedHashMap<String, String>();
for (String keyTemplate: templateRuleMapping.keySet()){
IRule currentTemplateRule = templateRuleMapping.get(keyTemplate);
targetFieldTemplateMap.put(((Rules)currentTemplateRule).getFunctionCall().getParameters().get("elementName"), "templName" + standaloneTemplateCounter);
templateRoot.addEnclosedElements(xslConstructor.writeOutRecursiveTemplate((Rules)currentTemplateRule , "templName" + (standaloneTemplateCounter++), this.ruleLanguageParser.getNamespaceDeclarations()).asXml());
}
XslElement chooseField = new XslElement(XsltConstants.choose);
XslElement whenField = new XslElement(XsltConstants.when,
"test", "//" + this.elementNameIndicatingDeletedRecords + "/@" + this.attributeNameIndicatingDeletedRecords + "='" + this.attributeValueIndicatingDeletedRecords + "'");
XslElement otherwiseField = new XslElement(XsltConstants.otherwise);
String templateAboutName = "applyAbout";
// write schema elements
for (SchemaElement element: schemaInspector.getChildElements()){
if (!element.containsSimpleType()){
String complexTypeTemplateName = "apply" + (templateCounter++);
XslElement complexTypeTemplate = new XslElement(XsltConstants.template, "name", complexTypeTemplateName);
// will contain only other elements
XslElement childField = new XslElement(XsltConstants.element, "name", element.getName());
if (element.getName().equals(this.elementNameIndicatingDeletedRecords)){
XslElement ifField = new XslElement(XsltConstants.ifCondition, "test", "//" + this.elementNameIndicatingDeletedRecords + "/@" + this.attributeNameIndicatingDeletedRecords);
XslElement attributeField = new XslElement(XsltConstants.attribute, "name", this.attributeNameIndicatingDeletedRecords);
XslElement valueofField = new XslElement(XsltConstants.valueOf, "select", "//" + this.elementNameIndicatingDeletedRecords + "/@" + this.attributeNameIndicatingDeletedRecords);
attributeField.addEnclosedElements(valueofField.asXml());
ifField.addEnclosedElements(attributeField.asXml());
childField.addEnclosedElements(ifField.asXml());
}
if (element.getName().equals(this.elementNameAbout)){
XslElement templateAbout = new XslElement(XsltConstants.template, "name", templateAboutName);
templateAbout.addEnclosedElements(xslConstructor.writeOutApplyAbout());
templateQueue.add(templateAbout.asXml());
}
for (SchemaElement childElement: element.getChildList()){
String currentKey = getPrefixedElementName(childElement);
if (childElement.containsSimpleType()){
log.debug("currentKey: " + currentKey);
if (ruleMapping.containsKey(currentKey)){
for (IRule currentRule: ruleMapping.get(currentKey)){
if (currentRule instanceof Rules){
log.debug(" has Set? " + ((Rules)currentRule).hasSet() );
log.debug(" has Condition? " + ((Rules)currentRule).hasCondition() );
if ( !((Rules)currentRule).hasCondition()){
if ( !((Rules)currentRule).hasSet() ){
if (currentRule.definesTemplateMatch()){
String templateName = ((Rules)currentRule).getTemplateMatch();
XslElement subTemplate = new XslElement(XsltConstants.template, "match", templateName);
subTemplate.addEnclosedElements(xslConstructor.writeOutRuleCopy((Rules)currentRule, currentKey));
templateQueue.add(subTemplate.asXml());
childField.addEnclosedElements(xslConstructor.writeOutApplyTemplates(((Rules)currentRule).getProperties().getProperty("applyTemplateSelectExpression")));
}else{
childField.addEnclosedElements(xslConstructor.writeOutRule((Rules)currentRule, currentKey));
}
}else{
childField.addEnclosedElements(xslConstructor.writeOutRuleComplex((Rules)currentRule, currentKey));
}
}else{
// has condition
if ( ((Rules)currentRule).getCondition().isPrimary((Rules)currentRule) &&
((Rules)currentRule).getUniqueName().equals( ((Rules)currentRule).getCondition().getSecondaryRule().getUniqueName() ) ){
if ( ((Rules)currentRule).getCondition().getApplyExpression() != null ){
String templateName = "apply" + (templateCounter++);
XslElement subTemplate = new XslElement(XsltConstants.template, "name", templateName);
subTemplate.addEnclosedElements(xslConstructor.writeOutApplyConditionalTemplateChoose((Rules)currentRule));
templateQueue.add(subTemplate.asXml());
childField.addEnclosedElements(xslConstructor.writeOutCallTemplate(templateName));
}else{
// a condition with alternative rules for the same output elements
childField.addEnclosedElements(xslConstructor.writeOutConditionalChoose((Rules) currentRule));
}
}else if ( ! ((Rules)currentRule).getCondition().getPrimaryRule().getUniqueName().equals( ((Rules)currentRule).getCondition().getSecondaryRule().getUniqueName() ) ){
// a condition with alternative rules for distinct output elements
if ( ((Rules)currentRule).getCondition().getApplyExpression() != null ){
// has apply expression
String templateName = "apply" + (templateCounter++);
XslElement subTemplate = new XslElement(XsltConstants.template, "name", templateName);
subTemplate.addEnclosedElements(xslConstructor.writeOutApplyConditionalTemplateIf((Rules)currentRule, false));
templateQueue.add(subTemplate.asXml());
childField.addEnclosedElements(xslConstructor.writeOutCallTemplate(templateName));
}else{
childField.addEnclosedElements(xslConstructor.writeOutConditionalIf((Rules) currentRule));
}
}
}
}else{
// only Rules instances are supported
}
}
}else if (targetFieldTemplateMap.containsKey(currentKey)){
childField.addEnclosedElements(xslConstructor.writeOutCallTemplate(targetFieldTemplateMap.get(currentKey)));
}else{
// no rule defined, check if element is mandatory
if (childElement.isMandatory()){
XslElement emptyField = new XslElement(currentKey);
childField.addEnclosedElements(emptyField.asXml());
}
}
}else{
// complex-type elements
if (ruleMapping.containsKey(currentKey)){
for (IRule currentRule: ruleMapping.get(currentKey)){
if (currentRule instanceof Rules){
if ( !((Rules)currentRule).hasCondition() ){
log.debug("stylesheetbuilder.complexType NO CONDITION: " + childElement.getName());
childField.addEnclosedElements(xslConstructor.writeOutRuleComplex((Rules)currentRule, currentKey));
}else{
// log.debug("stylesheetbuilder.complexType HAS CONDITION: ONLY PARTLY IMPLEMENTED !!!!!!!!!!");
if ( ((Rules)currentRule).getCondition().isPrimary((Rules)currentRule) &&
((Rules)currentRule).getUniqueName().equals( ((Rules)currentRule).getCondition().getSecondaryRule().getUniqueName() ) ){
// log.debug("condition: complex rule with same output elements");
if ( ((Rules)currentRule).getCondition().getApplyExpression() != null ){
log.debug("APPLY expression rules for complex-type elements NOT YET SUPPORTED");
// log.debug("complex rule with apply expression: NOT YET IMPLEMENTED !!!!!!!!!!!!");
}else{
// a condition with alternative rules for the same output elements
childField.addEnclosedElements(xslConstructor.writeOutConditionalChooseComplex((Rules) currentRule));
}
}else if ( ! ((Rules)currentRule).getCondition().getPrimaryRule().getUniqueName().equals(
((Rules)currentRule).getCondition().getSecondaryRule().getUniqueName() ) ){
log.debug("CURRENTLY UNSUPPORTED!!!");
// a condition with alternative rules for distinct output elements
if ( ((Rules)currentRule).getCondition().getApplyExpression() != null ){
log.debug("APPLY expression rules for complex-type elements NOT YET SUPPORTED");
String templateName = "apply" + (templateCounter++);
XslElement subTemplate = new XslElement(XsltConstants.template, "name", templateName);
subTemplate.addEnclosedElements(xslConstructor.writeOutApplyConditionalTemplateIf((Rules)currentRule, true));
templateQueue.add(subTemplate.asXml());
childField.addEnclosedElements(xslConstructor.writeOutCallTemplate(templateName));
// // has apply expression
// String templateName = "apply" + (templateCounter++);
// XslElement subTemplate = new XslElement(XsltConstants.template, "name", templateName);
// subTemplate.addEnclosedElements(xslConstructor.writeOutApplyConditionalTemplateIf((Rules)currentRule));
// templateQueue.add(subTemplate.asXml());
// childField.addEnclosedElements(xslConstructor.writeOutCallTemplate(templateName));
}else{
childField.addEnclosedElements(xslConstructor.writeOutConditionalIfComplex((Rules) currentRule));
}
}
}
}
}
}
}
}
if ( !(childField.isEmpty() && !element.isMandatory()) ){
complexTypeTemplate.addEnclosedElements(childField.asXml());
templateQueue.add(complexTypeTemplate.asXml());
if (element.getName().equals(this.elementNameIndicatingDeletedRecords)){
whenField.addEnclosedElements(xslConstructor.writeOutCallTemplate(complexTypeTemplateName));
}
otherwiseField.addEnclosedElements(xslConstructor.writeOutCallTemplate(complexTypeTemplateName));
// rootField.addEnclosedElements(childField.asXml());
}
}
}
chooseField.addEnclosedElements(whenField.asXml());
otherwiseField.addEnclosedElements(xslConstructor.writeOutCallTemplate(templateAboutName));
chooseField.addEnclosedElements(otherwiseField.asXml());
rootField.addEnclosedElements(chooseField.asXml());
template.addEnclosedElements(rootField.asXml());
templateRoot.addEnclosedElements(template.asXml());
// add sub-templates from queue
for (String templateCode: templateQueue){
templateRoot.addEnclosedElements(templateCode);
}
builder.append(templateRoot.asXml());
log.debug(dumpStylesheetTemplate(builder.toString()));
return builder.toString();
}
String dumpStylesheetTemplate(String aTemplate){
StringWriter w = new StringWriter();
Source s = new StreamSource(new StringReader(aTemplate));
Result r = new StreamResult(w);
Transformer t;
try {
t = TransformerFactory.newInstance().newTransformer();
t.setOutputProperty(OutputKeys.METHOD, "xml");
t.setOutputProperty(OutputKeys.INDENT, "yes");
t.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
t.transform(s, r);
} catch (Exception e) {
log.fatal(e);
}
return w.toString();
}
/**
* @param schemaInspector the schemaInspector to set
*/
public void setSchemaInspector(SchemaInspector schemaInspector) {
this.schemaInspector = schemaInspector;
}
/**
* @return the schemaInspector
*/
public SchemaInspector getSchemaInspector() {
return schemaInspector;
}
/**
* @return the ruleLanguageParser
*/
public RuleLanguageParser getRuleLanguageParser() {
return ruleLanguageParser;
}
/**
* @param ruleLanguageParser the ruleLanguageParser to set
*/
public void setRuleLanguageParser(RuleLanguageParser ruleLanguageParser) {
this.ruleLanguageParser = ruleLanguageParser;
}
/**
* @param namespaceContext the namespaceContext to set
*/
public void setNamespaceContext(NamespaceContext namespaceContext) {
this.namespaceContext = namespaceContext;
}
/**
* @return the namespaceContext
*/
public NamespaceContext getNamespaceContext() {
return namespaceContext;
}
private String getPrefixedElementName(SchemaElement aElement){
return ( this.namespaceContext.getPrefix(aElement.getTargetNamespace()) + ":" + aElement.getName() );
}
}

View File

@ -0,0 +1,353 @@
package eu.dnetlib.data.collective.transformation.engine.core;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Source;
import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import net.sf.saxon.FeatureKeys;
import net.sf.saxon.instruct.TerminationException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.springframework.core.io.Resource;
import eu.dnetlib.data.collective.transformation.TransformationException;
import eu.dnetlib.data.collective.transformation.core.schema.SchemaInspector;
import eu.dnetlib.data.collective.transformation.rulelanguage.RuleLanguageParser;
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
import eu.dnetlib.data.collective.transformation.utils.NamespaceContextImpl;
/**
* @author jochen
*
*/
public class TransformationImpl implements
ITransformation {
private static final String rootElement = "record";
private final Log log = LogFactory.getLog(TransformationImpl.class);
private Document xslDoc;
private SAXReader reader = new SAXReader();
private Transformer transformer;
private Transformer transformerFailed;
protected RuleLanguageParser ruleLanguageParser;
private StylesheetBuilder stylesheetBuilder;
// cache static transformation results, valid for one transformation job
private Map<String, String> staticResults = new LinkedHashMap<String, String>();
private Map<String, String> jobConstantMap = new HashMap<String, String>();
@javax.annotation.Resource(name="template")
private Resource template;
private Resource schema;
private Source xsltSyntaxcheckFailed;
/**
* initializes the transformation with the underlying XSL-template
*/
public void init(){
try {
xslDoc = reader.read(template.getInputStream());
Resource xslResource = template.createRelative(XSLSyntaxcheckfailed);
String systemId = xslResource.getURL().toExternalForm();
xsltSyntaxcheckFailed = new StreamSource(xslResource.getInputStream(), systemId);
} catch (Throwable e) {
log.error("cannot initialize this transformation.", e);
throw new IllegalStateException(e);
}
}
public void addJobConstant(String aKey, String aValue){
this.jobConstantMap.put(aKey, aValue);
}
/**
* creates a new Transformer object using a stylesheet based on the transformation rules
*/
public void configureTransformation()throws TransformerConfigurationException{
final List<TransformerException> errorList = new ArrayList<TransformerException>();
javax.xml.transform.ErrorListener listener = new javax.xml.transform.ErrorListener() {
@Override
public void warning(TransformerException exception) throws TransformerException {
// TODO Auto-generated method stub
}
@Override
public void fatalError(TransformerException exception) throws TransformerException {
// TODO Auto-generated method stub
errorList.add(exception);
throw exception;
}
@Override
public void error(TransformerException exception) throws TransformerException {
// TODO Auto-generated method stub
}
};
TransformerFactory factory = TransformerFactory.newInstance();
factory.setAttribute(FeatureKeys.ALLOW_EXTERNAL_FUNCTIONS, Boolean.TRUE);
factory.setErrorListener(listener);
Templates templates = null;
try{
if (this.ruleLanguageParser.isXslStylesheet()){
templates = factory.newTemplates(new StreamSource(new StringReader(ruleLanguageParser.getXslStylesheet())));
}else{
templates = factory.newTemplates(new StreamSource(createStylesheet()));
}
transformer = templates.newTransformer();
//((net.sf.saxon.Controller)transformer).setMessageEmitter(mw);
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
Templates templateFailed = factory.newTemplates(xsltSyntaxcheckFailed);
transformerFailed = templateFailed.newTransformer();
}catch(TransformerConfigurationException e){
if (!errorList.isEmpty()) {
System.out.println(errorList.get(0).getMessageAndLocation()); // todo it seems the location information is not yet correct
throw new TransformerConfigurationException(errorList.get(0).getMessageAndLocation());
}else{
throw e;
}
}
//((net.sf.saxon.Controller)transformerFailed).setMessageEmitter(mw);
}
/* (non-Javadoc)
* @see eu.dnetlib.data.collective.transformation.engine.core.ITransformation#transformRecord(java.lang.String, int)
*/
public String transformRecord(String record, int index)throws TerminationException, TransformationException{
try {
StreamSource s = new StreamSource(new StringReader(record));
StringWriter writer = new StringWriter();
StreamResult r = new StreamResult(writer);
transformer.setParameter("index", index);
transformer.transform(s , r);
return writer.toString();
}catch (TerminationException e) {
log.debug(e.getLocalizedMessage());
throw e;
} catch (TransformerException e) {
log.error(e);
throw new TransformationException(e);
}
}
public String transformRecord(String record, Map<String, String> parameters) throws TerminationException, TransformationException{
try {
StreamSource s = new StreamSource(new StringReader(record));
StringWriter writer = new StringWriter();
StreamResult r = new StreamResult(writer);
for (String key: parameters.keySet()){
transformer.setParameter(key, parameters.get(key));
}
transformer.transform(s , r);
return writer.toString();
}catch (TerminationException e){
log.debug(e.getLocalizedMessage());
throw e;
} catch (TransformerException e) {
log.error(e);
throw new TransformationException(e);
}
}
public String transformRecord(String record, String stylesheetName) throws TransformationException{
if (!stylesheetName.equals(XSLSyntaxcheckfailed))
throw new IllegalArgumentException("in TransformationImpl: stylesheetname " + stylesheetName + " is unsupported!" );
try{
StreamSource s = new StreamSource(new StringReader(record));
StringWriter w = new StringWriter();
StreamResult r = new StreamResult(w);
transformerFailed.transform(s, r);
return w.toString();
}catch (TransformerException e){
log.error(e);
throw new TransformationException(e);
}
}
public String dumpStylesheet(){
return xslDoc.asXML();
// StringWriter writer = new StringWriter();
// try {
// Transformer tXsl = transformer; //.newTransformer();
// tXsl.setOutputProperty(OutputKeys.INDENT, "yes");
// tXsl.setOutputProperty(OutputKeys.METHOD, "xml");
// tXsl.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
//
// StreamResult r = new StreamResult(writer);
// Source s = new StreamSource(new StringReader(xslDoc.asXML()));
// tXsl.transform(s, r);
// } catch (TransformerException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
// return writer.toString();
}
/**
* sets the XSL template
* @param template - resource to access the XSL template
*/
public void setTemplate(Resource template) {
this.template = template;
}
/**
* @return the resource to access the XSL template
*/
public Resource getTemplate() {
return template;
}
public void setRuleLanguageParser(RuleLanguageParser ruleLanguageParser) {
this.ruleLanguageParser = ruleLanguageParser;
}
public RuleLanguageParser getRuleLanguageParser() {
return ruleLanguageParser;
}
/**
* @param stylesheetBuilder the stylesheetBuilder to set
*/
public void setStylesheetBuilder(StylesheetBuilder stylesheetBuilder) {
this.stylesheetBuilder = stylesheetBuilder;
}
/**
* @return the stylesheetBuilder
*/
public StylesheetBuilder getStylesheetBuilder() {
return stylesheetBuilder;
}
/**
* @return the transformation rules as String object
*/
protected String getTransformationRules(){
// add job-properties to the rules as variables
for (String key: jobConstantMap.keySet()){
Rules r = new Rules();
r.setVariable(key);
r.setConstant("'" + jobConstantMap.get(key) + "'");
ruleLanguageParser.getVariableMappingRules().put(JOBCONST_DATASINKID, r);
}
if (this.stylesheetBuilder == null){
// create DMF compliant stylesheet builder
this.stylesheetBuilder = new StylesheetBuilder();
this.stylesheetBuilder.setRuleLanguageParser(this.ruleLanguageParser);
NamespaceContextImpl namespaceContext = new NamespaceContextImpl();
for (String prefix: ruleLanguageParser.getNamespaceDeclarations().keySet()){
namespaceContext.addNamespace(prefix, ruleLanguageParser.getNamespaceDeclarations().get(prefix));
}
SchemaInspector inspector = new SchemaInspector();
try {
inspector.inspect(this.schema.getURL(), rootElement);
} catch (Exception e) {
throw new IllegalStateException(e);
}
this.stylesheetBuilder.setNamespaceContext(namespaceContext);
this.stylesheetBuilder.setSchemaInspector(inspector);
}
return this.stylesheetBuilder.createTemplate();
}
/**
* creates a stylesheet from transformation rules;
* <p>don't call this method multiple times, unless transformation configuration changes, then re-init and configure transformation</p>
* @return the stylesheet
*/
private Reader createStylesheet(){
try {
Document rulesDoc = DocumentHelper.parseText(getTransformationRules());
for(String key: this.ruleLanguageParser.getNamespaceDeclarations().keySet()){
xslDoc.getRootElement().addNamespace(key, this.ruleLanguageParser.getNamespaceDeclarations().get(key));
}
@SuppressWarnings("unchecked")
List<Node> nodes = rulesDoc.getRootElement().selectNodes("//xsl:template");
@SuppressWarnings("unchecked")
List<Node> varNodes = rulesDoc.getRootElement().selectNodes("/templateroot/xsl:param");
for (Node node: varNodes){
xslDoc.getRootElement().add( ((Element)node).detach() );
}
// xslDoc.getRootElement().add(rulesDoc.getRootElement().selectSingleNode("//xsl:param[@name='var1']").detach());
for (Node node: nodes){
xslDoc.getRootElement().add( ((Element)node).detach() ); // (rulesDoc.getRootElement().aget);
}
} catch (DocumentException e) {
log.error("error in creating stylesheet: " + e);
throw new IllegalStateException(e);
}
return new StringReader(xslDoc.asXML());
}
/**
* @param schema the schema to set
*/
public void setSchema(Resource schema) {
this.schema = schema;
}
/**
* @return the schema
*/
public Resource getSchema() {
return schema;
}
@Override
public Map<String, String> getStaticTransformationResults() {
return this.staticResults;
}
@Override
public Map<String, String> getJobProperties() {
// TODO Auto-generated method stub
return this.jobConstantMap;
}
@Override
public Properties getLogInformation() {
// TODO Auto-generated method stub
return null;
}
}

View File

@ -0,0 +1,12 @@
package eu.dnetlib.data.collective.transformation.engine.functions;
import java.util.List;
public abstract class AbstractTransformationFunction implements
ITransformationFunction {
List<String> objectRecords;
List<String> resultRecords;
abstract String execute() throws ProcessingException;
}

View File

@ -0,0 +1,74 @@
package eu.dnetlib.data.collective.transformation.engine.functions;
import java.util.LinkedList;
import java.util.List;
import javax.annotation.Resource;
import eu.dnetlib.data.collective.transformation.VocabularyRegistry;
/**
* @author jochen
*
*/
public class Convert extends AbstractTransformationFunction {
public static final String paramVocabularyName = "vocabularyName";
public static final String paramFieldValue = "fieldValue";
public static final String paramDefaultPattern = "defaultPattern";
public static final String paramFunction = "function";
@Resource
private VocabularyRegistry vocabularyRegistry;
/**
* not implemented
* @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute()
*/
public String execute() throws ProcessingException {
return null;
}
/**
* extracts and returns the encoded value as used in the vocabulary
* @param vocabularyName the name of the vocabulary to be used
* @param fieldValues the list of values to normalize
* @return encoded value
* @throws ProcessingException
*/
public String executeSingleValue(String vocabularyName, List<String> fieldValues)throws ProcessingException{
if (!vocabularyRegistry.getVocabularies().containsKey(vocabularyName)){
throw new ProcessingException("unknown vocabulary: " + vocabularyName);
}
String returnValue = vocabularyRegistry.getVocabulary(vocabularyName).encoding(fieldValues);
return returnValue;
}
public List<String> executeAllValues(String vocabularyName, List<String> fieldValues) throws ProcessingException{
if (!vocabularyRegistry.getVocabularies().containsKey(vocabularyName)){
throw new ProcessingException("unknown vocabulary: " + vocabularyName);
}
List<String> computedValues = new LinkedList<String>();
int numOfComputedValues = fieldValues.size();
if (numOfComputedValues == 0) numOfComputedValues = 1; // return at least 1 value
String returnValue = vocabularyRegistry.getVocabulary(vocabularyName).encoding(fieldValues);
for (int i = 0; i < numOfComputedValues; i++){
computedValues.add(returnValue);
}
return computedValues;
}
public List<String> executeFilterByParams(String vocabName, List<String> fieldValues, String defaultPattern, String filterFunction) throws ProcessingException{
return vocabularyRegistry.getVocabulary(vocabName).encoding(fieldValues, defaultPattern, filterFunction);
}
public VocabularyRegistry getVocabularyRegistry() {
return vocabularyRegistry;
}
public void setVocabularyRegistry(VocabularyRegistry vocabularyRegistry) {
this.vocabularyRegistry = vocabularyRegistry;
}
}

View File

@ -0,0 +1,108 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.engine.functions;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
//import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
//import java.util.Map;
import org.apache.oro.text.perl.Perl5Util;
/**
* @author jochen
*
*/
public class DateVocabulary extends Vocabulary{
private static final String filterFuncMin = "min()";
private String pattern_1 = "/^(\\d{4,4}-\\d{1,2}-\\d{1,2})/";
private String pattern_2 = "/^(\\d{4,4}-\\d{1,2})$/";
private String pattern_3 = "/^(\\d{4,4})$/";
private String pattern_4 = "/^(\\d{1,2}.\\d{1,2}.\\d{4,4})$/";
private SimpleDateFormat df;
private transient Perl5Util perl5 = new Perl5Util();
public String encoding(List<String> aKeys) throws ProcessingException{
String tempKey_1 = null;
String tempKey_2 = null;
String tempKey_3 = null;
String currentKey = null;
String twoDigitFormat = String.format("%%0%dd", 2);
try{
for (String key: aKeys){
key = key.trim();
currentKey = key;
if (perl5.match(pattern_1, key)){
String[] dateSplitted = perl5.getMatch().toString().split("-");
String dateNormalized = dateSplitted[0] + "-" + String.format(twoDigitFormat, Integer.parseInt(dateSplitted[1])) + "-" + String.format(twoDigitFormat, Integer.parseInt(dateSplitted[2]));
return dateNormalized;
}else if (perl5.match(pattern_2, key)){
String[] dateSplitted = key.split("-");
tempKey_1 = dateSplitted[0] + "-" + String.format(twoDigitFormat, Integer.parseInt(dateSplitted[1])) + "-01";
}else if (perl5.match(pattern_3, key)){
tempKey_2 = key + "-01-01";
}else if (perl5.match(pattern_4, key)){
String[] components = key.split("[\\-\\/\\.]");
// ignore this key if it has less than 3 components
if (components.length >= 3)
tempKey_3 = components[2] + "-" + String.format(twoDigitFormat, Integer.parseInt(components[1])) + "-" + String.format(twoDigitFormat, Integer.parseInt(components[0]));
}
}
}catch(Throwable e){
throw new ProcessingException("Exception thrown in Datevocabulary (tried to match for value '" + currentKey + "'):", e);
}
if (tempKey_1 != null){
return tempKey_1;
}else if (tempKey_2 != null){
return tempKey_2;
}else if (tempKey_3 != null){
return tempKey_3;
}else{
return "";
}
}
@Override
public List<String> encoding(List<String> aKeys, String aDefaultPattern,
String aFilterFunction) throws ProcessingException {
List<String> evList = new LinkedList<String>();
df = new SimpleDateFormat(aDefaultPattern);
for (String v: aKeys){
String ev = encoding(Arrays.asList(new String[]{v}));
if (ev.length() > 0){
try {
if (aFilterFunction.trim().length() > 0 && !evList.isEmpty())
evList.add( filter(df.parse(ev), df.parse(evList.remove(0)), aFilterFunction) );
else
evList.add(df.format(df.parse(ev)));
} catch (ParseException e) {
throw new ProcessingException("invalid date format: " + ev);
}
}
}
return evList;
}
private String filter(Date d1, Date d2, String filter) throws ProcessingException{
if (filter.equals(filterFuncMin))
if (d1.before(d2))
return df.format(d1);
else
return df.format(d2);
else
throw new ProcessingException("unsupported filter function: " + filter);
}
}

View File

@ -0,0 +1,72 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.engine.functions;
import java.io.StringReader;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import eu.dnetlib.data.collective.transformation.IDatabaseConnector;
import eu.dnetlib.data.collective.transformation.TransformationException;
/**
* @author jochen
*
*/
public class Dblookup extends AbstractTransformationFunction {
public static final String paramSqlExpr = "sqlExpr";
private IDatabaseConnector dbConnector;
/**
*
*/
public Dblookup() {
// TODO Auto-generated constructor stub
}
/* (non-Javadoc)
* @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute()
*/
@Override
String execute() throws ProcessingException {
// TODO Auto-generated method stub
return null;
}
/**
* @return the dbConnector
*/
public IDatabaseConnector getDbConnector() {
return dbConnector;
}
/**
* @param dbConnector the dbConnector to set
*/
public void setDbConnector(IDatabaseConnector dbConnector) {
this.dbConnector = dbConnector;
}
public LookupRecord getResults(String aSqlExpression) throws TransformationException, XPathExpressionException {
LookupRecord lookupRecord = new LookupRecord();
XPath xpath = XPathFactory.newInstance().newXPath();
for (String record: dbConnector.getResult(aSqlExpression)){
InputSource inSource = new InputSource(new StringReader(record));
Node root = (Node)xpath.evaluate("/", inSource, XPathConstants.NODE);
lookupRecord.setRecord(xpath.evaluate("//FIELD[@name='accessinfopackage']/text()", root),
"officialname", xpath.evaluate("//FIELD[@name='officialname']/text()", root));
lookupRecord.setRecord(xpath.evaluate("//FIELD[@name='accessinfopackage']/text()", root),
"id", xpath.evaluate("//FIELD[@name='id']/text()", root));
}
return lookupRecord;
}
}

View File

@ -0,0 +1,50 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.engine.functions;
import java.util.List;
import eu.dnetlib.data.collective.transformation.TransformationException;
/**
* @author jochen
*
*/
public class Extract extends AbstractTransformationFunction {
public static final String paramNameFeature = "feature";
private IFeatureExtraction featureExtraction;
/* (non-Javadoc)
* @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute()
*/
@Override
String execute() throws ProcessingException {
// TODO Auto-generated method stub
return null;
}
public List<String> execute(List<String> aObjectRecords, String aFeature) throws ProcessingException{
try {
return featureExtraction.execute(aObjectRecords, aFeature);
} catch (TransformationException e) {
throw new ProcessingException(e);
}
}
/**
* @param featureExtraction the featureExtraction to set
*/
public void setFeatureExtraction(IFeatureExtraction featureExtraction) {
this.featureExtraction = featureExtraction;
}
/**
* @return the featureExtraction
*/
public IFeatureExtraction getFeatureExtraction() {
return featureExtraction;
}
}

View File

@ -0,0 +1,25 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.engine.functions;
import java.util.List;
import eu.dnetlib.data.collective.transformation.TransformationException;
/**
* @author jochen
*
*/
public interface IFeatureExtraction {
/**
* applies the extraction of a feature on objectRecords
* @param aObjectRecords
* @param aFeatureName
* @return list of extracted results
* @throws TransformationServiceException
*/
public List<String> execute(List<String> aObjectRecords, String aFeatureName) throws TransformationException;
}

View File

@ -0,0 +1,5 @@
package eu.dnetlib.data.collective.transformation.engine.functions;
public interface ITransformationFunction {
}

View File

@ -0,0 +1,31 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.engine.functions;
import java.util.List;
/**
* @author jochen
*
*/
public interface IVocabulary {
/**
* return the encoding for a given list of values
* @param keys
* @return the encoding as string
* @throws ProcessingException
*/
public String encoding(List<String> keys) throws ProcessingException;
/**
* return the encoding for a given list of values using a default pattern and applying a filter function
* @param aKeys
* @param aDefaultPattern
* @param aFilterFunction
* @return the list of encoded values
* @throws ProcessingException
*/
public List<String> encoding(List<String> aKeys, String aDefaultPattern, String aFilterFunction) throws ProcessingException;
}

View File

@ -0,0 +1,114 @@
package eu.dnetlib.data.collective.transformation.engine.functions;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class IdentifierExtract extends AbstractTransformationFunction{
public static final Log log = LogFactory.getLog(IdentifierExtract.class);
public static final String paramXpathExprJson = "xpathExprJson";
public static final String paramXpathExprInSource = "xpathExprInputSource";
public static final String paramRegExpr = "regExpr";
@Override
String execute() throws ProcessingException {
// TODO Auto-generated method stub
return null;
}
/**
* extract content matched by a regular expression pattern from a given node and return matched content as a node-list
* @param aXpathExprList
* @param aInput
* @param aRegExpression
* @param aDocument
* @param aXpath
* @return nodeList
* @throws ProcessingException
*/
public NodeList extract(List<String> aXpathExprList, Node aInput,
String aRegExpression, Document aDocument, XPath aXpath) throws ProcessingException {
log.debug("xpathExprList: " + aXpathExprList);
log.debug("regExpr: " + aRegExpression);
Set<String> identifierSet = new HashSet<String>();
// log.debug("node: length: " + aInput.getChildNodes().getLength());
log.debug("regular expression : " + aRegExpression);
Pattern p = Pattern.compile(aRegExpression);
try {
List<String> textList = extractText(aXpathExprList, aInput, aXpath);
for (String text: textList){
log.debug("text as input : " + text);
Matcher m = p.matcher(text);
while (m.find()){
log.debug("extracted identifier: " + m.group());
identifierSet.add(m.group());
}
}
return toNodeList(identifierSet, aDocument);
} catch (XPathExpressionException e) {
e.printStackTrace();
throw new ProcessingException(e);
} catch (ParserConfigurationException e) {
e.printStackTrace();
throw new ProcessingException(e);
}
}
/**
* create a list of nodes from a list of string values
* @param aValueSet, set of unique values
* @param aDocument
* @return nodeList
*/
private NodeList toNodeList(Set<String> aValueSet, Document aDocument){
DocumentFragment dFrag = aDocument.createDocumentFragment();
Element root = aDocument.createElement("root");
dFrag.appendChild(root);
for (String value: aValueSet){
Element eVal = aDocument.createElement("value");
eVal.setTextContent(value);
root.appendChild(eVal);
}
return dFrag.getChildNodes();
}
/**
* extract text from a given node using a list of given xpath expressions
* @param aXpathExprList
* @param aInput
* @param aXpath
* @return list of strings
* @throws XPathExpressionException
* @throws ParserConfigurationException
*/
private List<String> extractText(List<String> aXpathExprList, Node aInput, XPath aXpath) throws XPathExpressionException, ParserConfigurationException{
List<String> resultList = new LinkedList<String>();
for (String xpathExpr: aXpathExprList){
NodeList nodeList = (NodeList)aXpath.evaluate(xpathExpr, aInput, XPathConstants.NODESET);
log.debug("extract text: nodelist length: " + nodeList.getLength());
for (int i = 0; i < nodeList.getLength(); i++){
resultList.add(nodeList.item(i).getTextContent());
}
}
return resultList;
}
}

View File

@ -0,0 +1,34 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.engine.functions;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* @author jochen
*
*/
public class Lookup extends AbstractTransformationFunction {
public static final Log log = LogFactory.getLog(Lookup.class);
public static final String paramExprIdentifier = "exprIdentifier";
public static final String paramExprProperty = "exprProperty";
/**
*
*/
public Lookup() {
// TODO Auto-generated constructor stub
}
/* (non-Javadoc)
* @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute()
*/
@Override
String execute() throws ProcessingException {
// TODO Auto-generated method stub
return null;
}
}

View File

@ -0,0 +1,33 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.engine.functions;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Properties;
/**
* @author jochen
*
*/
public class LookupRecord {
private HashMap<String, Properties> recordMap = new LinkedHashMap<String, Properties>();
public void setRecord(String aRecordKey, String aPropertyKey, String aPropertyValue){
if (recordMap.containsKey(aRecordKey)){
recordMap.get(aRecordKey).setProperty(aPropertyKey, aPropertyValue);
}else{
Properties p = new Properties();
p.setProperty(aPropertyKey, aPropertyValue);
recordMap.put(aRecordKey, p);
}
}
public String getPropertyValue(String aRecordKey, String aPropertyKey){
if (!recordMap.containsKey(aRecordKey)) return "UNKNOWN";
return recordMap.get(aRecordKey).getProperty(aPropertyKey, "UNKNOWN");
}
}

View File

@ -0,0 +1,26 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.engine.functions;
import java.util.List;
import prototype.Person;
/**
* @author jochen
*
*/
public class PersonVocabulary extends Vocabulary{
@Override
public String encoding(List<String> keys)throws ProcessingException{
Person p;
String result = "";
for (String input: keys){
p = new Person(input);
result = p.getNormalisedFullname();
}
return result;
}
}

View File

@ -0,0 +1,46 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.engine.functions;
/**
* @author jochen
*
*/
public class ProcessingException extends Exception {
/**
*
*/
private static final long serialVersionUID = -8648116731979859467L;
/**
*
*/
public ProcessingException() {
super();
}
/**
* @param arg0
*/
public ProcessingException(String arg0) {
super(arg0);
}
/**
* @param arg0
*/
public ProcessingException(Throwable arg0) {
super(arg0);
}
/**
* @param arg0
* @param arg1
*/
public ProcessingException(String arg0, Throwable arg1) {
super(arg0, arg1);
}
}

View File

@ -0,0 +1,60 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.engine.functions;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.oro.text.perl.MalformedPerl5PatternException;
import org.apache.oro.text.perl.Perl5Util;
/**
* @author jochen
*
*/
public class RegularExpression extends AbstractTransformationFunction {
public static final Log log = LogFactory.getLog(RegularExpression.class);
public static final String paramRegularExpr = "regularExpression";
public static final String paramExpr1 = "expr1";
public static final String paramExpr2 = "expr2";
private Perl5Util util = new Perl5Util();
/* (non-Javadoc)
* @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute()
*/
@Override
String execute() throws ProcessingException {
// TODO Auto-generated method stub
return null;
}
public String executeSingleValue(String aRegularExpression, String aExpr1, String aExpr2) throws ProcessingException{
String result = "";
if (aRegularExpression.startsWith("m/")){
if (util.match(aRegularExpression, aExpr1))
result = util.group(1);
}else if (!aRegularExpression.startsWith("s/")){
// assume match and extract
// throw new ProcessingException("unsupported or invalid regular expression: " + aRegularExpression);
if (util.match(aRegularExpression, aExpr1)){
String funder = util.group(1).toLowerCase();
String projectId = util.group(3);
result = funder + "_" + projectId;
}
}else{
try{
result = util.substitute(aRegularExpression, aExpr1);
}catch(MalformedPerl5PatternException patternExc){
log.fatal("aRegularExpression: " + aRegularExpression);
log.fatal("aExpr1: " + aExpr1);
log.fatal(patternExc.getMessage());
throw new ProcessingException(patternExc);
}
}
return result;
}
}

View File

@ -0,0 +1,157 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.engine.functions;
import java.io.StringReader;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import javax.xml.namespace.NamespaceContext;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.xml.sax.InputSource;
import eu.dnetlib.common.profile.Resource;
import eu.dnetlib.common.profile.ResourceDao;
import eu.dnetlib.data.collective.transformation.rulelanguage.Argument;
/**
* @author jochen
*
*/
public class RetrieveValue extends AbstractTransformationFunction {
public static final Log log = LogFactory.getLog(RetrieveValue.class);
public static final String paramFunctionName = "functionName";
public static final String paramFunctionProfileId = "functionParameterProfileId";
public static final String paramFunctionExpr = "functionParameterExpr";
public enum FUNCTION {PROFILEFIELD, CURRENTDATE};
@javax.annotation.Resource
private ResourceDao resourceDao;
/* (non-Javadoc)
* @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute()
*/
@Override
String execute() throws ProcessingException {
// TODO Auto-generated method stub
return null;
}
public String executeSingleValue(String functionName, List<Argument> arguments, String objRecord, Map<String, String> namespaceMap) throws ProcessingException{
String result = "";
FUNCTION function = FUNCTION.valueOf(functionName);
switch(function){
case PROFILEFIELD:
if (arguments.size() != 2){
throw new ProcessingException("invalid number of arguments - required 2 but found :" + arguments.size());
}
String arg = "";
Resource resource = null;
try{
if (arguments.get(0).isValue()){
arg = arguments.get(0).getArgument();
log.debug("retrieve value arg isValue: " + arg);
if (arg.startsWith("collection(")) { // xquery
arg = StringEscapeUtils.unescapeXml(arg);
resource = resourceDao.getResourceByQuery(arg); // query
}else
resource = resourceDao.getResource(arg); // profile id
}else if (arguments.get(0).isInputField()){
arg = evaluateXpath(objRecord, arguments.get(0).getArgument(), namespaceMap);
log.debug("retrieve value arg isInputField: " + arg);
if (arg.startsWith("collection(")) { // xquery
arg = StringEscapeUtils.unescapeXml(arg);
resource = resourceDao.getResourceByQuery(arg); // query
}else
resource = resourceDao.getResource(arg); // profile id
}else if (arguments.get(0).isJobConst()){
// TODO
}else if (arguments.get(0).isVariable()){
// TODO
log.warn("RETRIEVEVALUE: support for variables not yet implemented.");
}
}catch(Exception e){
throw new ProcessingException(e);
}
if (resource == null){
throw new ProcessingException("invalid profileId: " + arg + "; functionName: " + functionName + ", arg1: " + arguments.get(0).getArgument() + ", arg2: " + arguments.get(1).getArgument());
}
result = resource.getValue(arguments.get(1).getArgument()); // xpath expr
break;
case CURRENTDATE:
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); // TODO format string
result = dateFormat.format(new Date());
default:
// unsupported
break;
}
return result;
}
/**
* @return the resourceDao
*/
public ResourceDao getResourceDao() {
return resourceDao;
}
/**
* @param resourceDao the resourceDao to set
*/
public void setResourceDao(ResourceDao resourceDao) {
this.resourceDao = resourceDao;
}
private String evaluateXpath(String record, String xpathExpr, Map<String, String> nsMap){
XPath xpath = XPathFactory.newInstance().newXPath();
xpath.setNamespaceContext(new NamespaceContext() {
@Override
public Iterator getPrefixes(String namespaceURI) {
return null;
}
@Override
public String getPrefix(String namespaceURI) {
// TODO Auto-generated method stub
return null;
}
@Override
public String getNamespaceURI(String prefix) {
if ("dri".equals(prefix)){
return "http://www.driver-repository.eu/namespace/dri";
}else if ("dr".equals(prefix)){
return "http://www.driver-repository.eu/namespace/dr";
}else if ("dc".equals(prefix)){
return "http://purl.org/dc/elements/1.1/";
}else if ("oaf".equals(prefix)){
return "http://namespace.openaire.eu/oaf";
}else if ("prov".equals(prefix)){
return "http://www.openarchives.org/OAI/2.0/provenance";
}
return "";
}
});
try {
return xpath.evaluate(xpathExpr, new InputSource(new StringReader(record)));
} catch (XPathExpressionException e) {
log.fatal("cannot evaluate xpath");
throw new IllegalStateException(e);
}
}
}

View File

@ -0,0 +1,86 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.engine.functions;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.util.StringUtils;
/**
* @author js
*
*/
public class Split extends AbstractTransformationFunction {
public static final Log log = LogFactory.getLog(Split.class);
public static final String paramInputExpr = "inputExpr";
public static final String paramRegExpr = "regExpr";
public static final String paramElementName = "elementName";
private Map<String, Queue<String>> queueMap = new HashMap<String, Queue<String>>();
/* (non-Javadoc)
* @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute()
*/
@Override
String execute() throws ProcessingException {
// TODO Auto-generated method stub
return null;
}
/**
* split a given list of values using a delimiter as regularExpression
* @param aInputValue
* @param aRegExpr
* @return the collection of all values splitted
*/
public Collection<String> executeAllValues(List<String> aInputValues, String aRegExpr) throws ProcessingException{
Collection<String> result = new LinkedList<String>();
for (String value: aInputValues){
String[] values = StringUtils.tokenizeToStringArray(value, aRegExpr, true, true);
result.addAll(Arrays.asList(values));
}
return result;
}
/**
* split a given list of values stored in an internal queue and return the element from the head of the queue (recursive)
* @param aInputValues
* @param aRegExpr
* @param aCallId
* @return
* @throws ProcessingException
*/
public String executeSingleValue(List<String> aInputValues, String aRegExpr, String aCallId) throws ProcessingException{
if (!queueMap.containsKey(aCallId)){
Queue<String> queue = new LinkedList<String>();
queueMap.put(aCallId, queue);
for (String value: aInputValues){
String[] values = StringUtils.tokenizeToStringArray(value, aRegExpr, true, true);
queue.addAll(Arrays.asList(values));
}
}
String result = queueMap.get(aCallId).poll();
if (result == null){
queueMap.remove(aCallId);
}
return result;
}
public String executeSingleValue(String aCallId) throws ProcessingException{
String result = queueMap.get(aCallId).poll();
if (result == null){
queueMap.remove(aCallId);
}
return result;
}
}

View File

@ -0,0 +1,209 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.engine.functions;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.dom4j.Node;
import eu.dnetlib.common.profile.Resource;
import eu.dnetlib.common.utils.XMLUtils;
/**
* @author jochen
*
*/
public class Vocabulary implements IVocabulary{
private List<Term> terms;
private Map<String, String> encodingMap;
private Resource resource;
private boolean isCaseSensitive = true;
private String delimiter = null;
private String name = null;
/**
* @return the terms
*/
public List<Term> getTerms() {
return terms;
}
/**
* @param terms the terms to set
*/
public void setTerms(List<Term> terms) {
this.terms = terms;
}
/**
* @return the name
*/
public String getName() {
return name;
}
/**
* @param name the name to set
*/
public void setName(String name) {
this.name = name;
}
public String getVocabularyName(){
return resource.getValue("//VOCABULARY_NAME");
}
/**
* returns the normalized, encoded String for a given key if found, otherwise a special value -depending on the vocabulary- is returned indicating that it couldn't be normalized
* @param key a list of Strings to encode
* @return a normalized, encoded String
*/
@Override
public String encoding(List<String> keys)throws ProcessingException{
// take the first best
for (String key: keys){
key = key.trim();
if (!isCaseSensitive)
key = key.toLowerCase();
if (encodingMap.containsKey(key))
return encodingMap.get(key);
}
if (encodingMap.containsKey("Unknown") || encodingMap.containsKey("unknown")){
if (isCaseSensitive) return encodingMap.get("Unknown");
else return encodingMap.get("unknown");
}else{
if (isCaseSensitive) return encodingMap.get("Undetermined");
else return encodingMap.get("undetermined");
}
}
class Term{
String code;
String name;
List<String> synonyms = new LinkedList<String>();
void addSynonym(String synonym){
synonyms.add(synonym);
}
List<String> getSynonyms(){
return synonyms;
}
}
/**
* init the encoding with the given list of term parameters
* @param termList list of parameters with expected key:value pairs 'name':string, 'encoding':string, 'synonyms':list<string>
*/
@SuppressWarnings("unchecked")
public void setResource(List<Map<String, ?>> aTermList){
terms = new LinkedList<Term>();
for (Map<String, ?> termMap : aTermList){
Term t = new Term();
terms.add(t);
t.name = (String)termMap.get("name");
t.code = (String)termMap.get("code");
for (String synonym: (List<String>)termMap.get("synonyms"))
t.addSynonym(synonym);
}
setCode();
}
/**
* init the encoding with term parameters from a vocabulary resource profile
* @param resource
*/
public void setResource(Resource resource) {
this.resource = resource;
terms = new LinkedList<Term>();
List<Node> nodes = resource.getNodeList("//TERMS/*");
int index = 1;
for (Node n: nodes){
Term t = new Term();
terms.add(t);
try {
t.name = XMLUtils.getNode(n, "//TERM[" + index + "]/@english_name").getText();
t.code = XMLUtils.getNode(n, "//TERM[" + index + "]/@code").getText();
List<Node> nsynonyms = XMLUtils.getNodes(n, "//TERM[" + index + "]/SYNONYMS/*");
int indexSynonyms = 1;
for (Node nsynonym: nsynonyms){
String synonymTerm = XMLUtils.getNode(nsynonym, "//TERM[" + index + "]//SYNONYM[" + indexSynonyms + "]/@term").getText();
t.addSynonym(synonymTerm);
indexSynonyms++;
}
} catch (Exception e) {
throw new IllegalStateException(e);
}
index++;
}
setCode();
}
private void setCode(){
encodingMap = new TreeMap<String, String>();
for (Term t: terms){
if (isCaseSensitive){
encodingMap.put(t.name, t.code);
encodingMap.put(t.code, t.code);
}else{
encodingMap.put(t.name.toLowerCase(), t.code);
encodingMap.put(t.code.toLowerCase(), t.code);
}
if (this.delimiter != null){
String[] splittedEncodings = t.code.split(this.delimiter);
for (String encoding: splittedEncodings){
if (isCaseSensitive){
encodingMap.put(encoding, t.code);
}else{
encodingMap.put(encoding.toLowerCase(), t.code);
}
}
}
for (String synonym : t.synonyms){
if (isCaseSensitive) encodingMap.put(synonym, t.code);
else encodingMap.put(synonym.toLowerCase(), t.code);
}
}
}
public Resource getResource() {
return resource;
}
public void setCaseSensitive(boolean isCaseSensitive) {
this.isCaseSensitive = isCaseSensitive;
}
public boolean isCaseSensitive() {
return isCaseSensitive;
}
/**
* @param delimiter the delimiter to set
*/
public void setDelimiter(String delimiter) {
this.delimiter = delimiter;
}
/**
* @return the delimiter
*/
public String getDelimiter() {
return delimiter;
}
@Override
public List<String> encoding(List<String> aKeys, String aDefaultPattern,
String aFilterFunction) throws ProcessingException {
throw new ProcessingException("no implementation of filtered encoding.");
}
}

View File

@ -0,0 +1,38 @@
package eu.dnetlib.data.collective.transformation.rulelanguage;
/**
* @author jochen
*
*/
public class Argument {
public enum Type {VALUE, INPUTFIELD, JOBCONST, VAR};
private Type type;
private String argument;
public Argument(Type aType, String aArgument) {
this.type = aType;
this.argument = aArgument;
}
public boolean isValue(){
return this.type.equals(Type.VALUE);
}
public boolean isInputField(){
return this.type.equals(Type.INPUTFIELD);
}
public boolean isJobConst(){
return this.type.equals(Type.JOBCONST);
}
public boolean isVariable(){
return this.type.equals(Type.VAR);
}
public String getArgument(){
return this.argument;
}
}

View File

@ -0,0 +1,76 @@
package eu.dnetlib.data.collective.transformation.rulelanguage;
/**
* @author jochen
*
*/
public class Condition {
private String applyExpression;
private String conditionExpression;
private Rules primaryRule;
private Rules secondaryRule;
/**
* @param applyExpression the applyExpression to set
*/
public void setApplyExpression(String applyExpression) {
this.applyExpression = applyExpression;
}
/**
* @return the applyExpression
*/
public String getApplyExpression() {
return applyExpression;
}
/**
* @param conditionExpression the conditionExpression to set
*/
public void setConditionExpression(String conditionExpression) {
this.conditionExpression = conditionExpression;
}
/**
* @return the conditionExpression
*/
public String getConditionExpression() {
return conditionExpression;
}
public boolean isPrimary(Rules aRule){
if (aRule.equals(primaryRule)) return true;
return false;
}
/**
* @param primaryRule the primaryRule to set
*/
public void setPrimaryRule(Rules primaryRule) {
this.primaryRule = primaryRule;
}
/**
* @return the primaryRule
*/
public Rules getPrimaryRule() {
return primaryRule;
}
/**
* @param secondaryRule the secondaryRule to set
*/
public void setSecondaryRule(Rules secondaryRule) {
this.secondaryRule = secondaryRule;
}
/**
* @return the secondaryRule
*/
public Rules getSecondaryRule() {
return secondaryRule;
}
}

View File

@ -0,0 +1,27 @@
package eu.dnetlib.data.collective.transformation.rulelanguage;
/**
* @author jochen
*
*/
public interface IRule {
public String getUniqueName();
public boolean hasCondition();
/**
* returns true when the rule has pending rules that set an element
* @return true if it has a ruleSet, false otherwise
*/
public boolean hasSet();
public boolean definesVariable();
public boolean definesTargetField();
public boolean definesTemplate();
public boolean definesTemplateMatch();
}

View File

@ -0,0 +1,129 @@
package eu.dnetlib.data.collective.transformation.rulelanguage;
import java.io.InputStream;
import java.io.Reader;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTStart;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.FtScript;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ParseException;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.SimpleNode;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyScript.SCRIPTTYPE;
import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall;
import eu.dnetlib.data.collective.transformation.rulelanguage.visitor.AbstractVisitor;
import eu.dnetlib.data.collective.transformation.rulelanguage.visitor.RuleLanguageVisitor;
/**
* Parser for rule scripts
* @author jochen
*
*/
public class RuleLanguageParser {
private static final Log log = LogFactory.getLog(RuleLanguageParser.class);
private RuleLanguageVisitor visitor = new RuleLanguageVisitor();
protected static FtScript scriptParser = null;
private String xslStylesheet = null;
public void parse(InputStream inStream){
if (scriptParser == null) scriptParser = new FtScript(inStream);
scriptParser.ReInit(inStream);
parsingAndTraversing();
}
public void parse(Reader inRead){
if (scriptParser == null) scriptParser = new FtScript(inRead);
scriptParser.ReInit(inRead);
parsingAndTraversing();
}
private void parsingAndTraversing(){
try {
ASTStart start = scriptParser.Start();
traverseTree(start, visitor);
} catch (ParseException e) {
log.error(e);
throw new IllegalStateException(e);
}
}
public String getScriptName(){
return visitor.getScriptName();
}
public SCRIPTTYPE getScriptType(){
return visitor.getScriptType();
}
public Map<String, Set<IRule>> getElementMappingRules(){
return visitor.getElementMappingRules();
}
public Map<String, IRule> getVariableMappingRules(){
return visitor.getVariableMappingRules();
}
public Map<String, IRule> getTemplateMappingRules(){
return visitor.getTemplateMappingRules();
}
public List<String> getImportedScripts(){
return visitor.getImportedScripts();
}
public Map<String, String> getNamespaceDeclarations(){
return visitor.getNamespaceDeclarations();
}
public List<FunctionCall> getFunctionCalls(){
return visitor.getFunctionCalls();
}
public List<Map<String, String>> getPreprocessings(){
return visitor.getPreprocessings();
}
public String getXslStylesheet() {
return xslStylesheet;
}
public void setXslStylesheet(String xslStylesheet) {
this.xslStylesheet = xslStylesheet;
}
public boolean isXslStylesheet(){
if (xslStylesheet != null) return true;
else return false;
}
/**
* adds the rules and name-space declarations from another parser, e.g. a child parser of imported scripts, to this parser
* @param aParser
*/
public void addRulesFromParser(RuleLanguageParser aParser){
this.visitor.getFunctionCalls().addAll(aParser.getFunctionCalls());
this.visitor.getElementMappingRules().putAll(aParser.getVisitor().getElementMappingRules());
this.visitor.getVariableMappingRules().putAll(aParser.getVisitor().getVariableMappingRules());
this.visitor.getTemplateMappingRules().putAll(aParser.getVisitor().getTemplateMappingRules());
this.visitor.getNamespaceDeclarations().putAll(aParser.getNamespaceDeclarations());
}
protected RuleLanguageVisitor getVisitor(){
return visitor;
}
private void traverseTree(SimpleNode node, AbstractVisitor visitor){
for (int i = 0; i < node.jjtGetNumChildren(); i++){
SimpleNode sn = (SimpleNode)node.jjtGetChild(i);
sn.jjtAccept(visitor, null);
traverseTree(sn, visitor);
}
}
}

View File

@ -0,0 +1,316 @@
package eu.dnetlib.data.collective.transformation.rulelanguage;
import java.util.Properties;
import eu.dnetlib.data.collective.transformation.core.schema.SchemaElement;
import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall;
/**
* @author jochen
*
*/
public class Rules implements Comparable<Rules>, IRule{
public static final String staticRule = "static";
private String attribute = "";
private String targetField = "";
private String ruleDeclaration = "";
private String xpath = "";
private String constant = "";
private String namespace = "";
private String variable = "";
private String template = "";
private String templateMatch = "";
private FunctionCall funcCall;
private Condition condition;
private boolean isEmpty = false;
private boolean isSkip = false;
private SchemaElement targetSchemaElement;
private String assignmentVariable = "";
private RulesSet rulesSet;
private Properties properties = new Properties();
public Rules() {
}
/**
* indicates if the rule is declared as static
* @return true if static, false otherwise
*/
public boolean isStatic(){
if (ruleDeclaration.equals(staticRule)){
return true;
}
return false;
}
/**
* indicates if the rule defines a variable
* @return true if variable is defined, false otherwise
* @see eu.dnetlib.data.collective.transformation.rulelanguage.IRule#definesVariable()
*/
public boolean definesVariable(){
if (variable.length() > 0) return true;
return false;
}
public boolean definesTargetField(){
if (targetField.length() > 0) return true;
return false;
}
/**
* checks if this rule defines an attribute
* @return true if defines attribute else false
*/
public boolean definesAttribute(){
if (attribute.length() > 0) return true;
return false;
}
public boolean definesTemplate(){
if (template.length() > 0) return true;
return false;
}
@Override
public boolean definesTemplateMatch() {
if (templateMatch.length() > 0) return true;
return false;
}
public void setXpath(String xpath) {
this.xpath = xpath;
}
public String getXpath() {
return xpath;
}
/**
* sets the argument aVariable as the value of the rule
* @param aVariable the variable as a reference to the value
*/
public void setAssignmentVariable(String aVariable){
this.assignmentVariable = aVariable;
}
public String getAssignmentVariable(){
return this.assignmentVariable;
}
public void setNamespace(String namespace) {
this.namespace = namespace;
}
public String getNamespace() {
return namespace;
}
public void setConstant(String constant) {
this.constant = constant;
}
public String getConstant() {
return constant;
}
@Deprecated
public void setTargetField(String targetField) {
if (this.variable.length() > 0){
throw new IllegalStateException("Invalid rule definition: a rule is either defined as an output element or as a variable");
}
this.targetField = targetField;
}
/*
* @deprecated replaced by {@Link #getUniqueName()}
*/
@Deprecated
public String getTargetField() {
return targetField;
}
public void setRuleDeclaration(String ruleDeclaration) {
this.ruleDeclaration = ruleDeclaration;
}
public String getRuleDeclaration() {
return ruleDeclaration;
}
/*
* compares two rules objects based on their xpath, function and namespace names
* @see java.lang.Comparable#compareTo(java.lang.Object)
*/
public int compareTo(Rules o) {
if (
o.targetField.equals(this.targetField) &&
o.variable.equals(this.variable) &&
o.template.equals(this.template) &&
o.templateMatch.equals(this.templateMatch) &&
o.ruleDeclaration.equals(this.ruleDeclaration) &&
o.namespace.equals(this.namespace) &&
o.constant.equals(this.constant) &&
o.xpath.equals(this.xpath)){
return 0;
}else{
return -1;
}
}
public void setFunctionCall(FunctionCall funcCall) {
this.funcCall = funcCall;
}
public FunctionCall getFunctionCall() {
return funcCall;
}
@Override
public String getUniqueName() {
if (this.definesVariable()) return this.variable;
else if (this.definesTemplate()) return this.template;
return this.targetField;
}
@Override
public boolean hasCondition() {
if (condition != null) return true;
return false;
}
/**
* @return the condition
*/
public Condition getCondition() {
return condition;
}
/**
* @param condition the condition to set
*/
public void setCondition(Condition condition) {
this.condition = condition;
}
/**
* @param variable the variable to set
*/
public void setVariable(String variable) {
if (this.targetField.length() > 0){
throw new IllegalStateException("Invalid rule definition: a rule is either defined as an output element or as a variable");
}
this.variable = variable;
}
/**
* @return the variable
*/
public String getVariable() {
return variable;
}
/**
* @param isEmpty the isEmpty to set
*/
public void setEmpty(boolean isEmpty) {
this.isEmpty = isEmpty;
}
/**
* @return the isEmpty
*/
public boolean isEmpty() {
return isEmpty;
}
/**
* @param targetSchemaElement the targetSchemaElement to set
*/
public void setTargetSchemaElement(SchemaElement targetSchemaElement) {
this.targetSchemaElement = targetSchemaElement;
}
/**
* @return the targetSchemaElement
*/
public SchemaElement getTargetSchemaElement() {
return targetSchemaElement;
}
/**
* @return the template
*/
public String getTemplate() {
return template;
}
/**
* @param template the template to set
*/
public void setTemplate(String template) {
this.template = template;
}
/**
* @return the attribute
*/
public String getAttribute() {
return attribute;
}
/**
* @param attribute the attribute to set
*/
public void setAttribute(String attribute) {
this.attribute = attribute;
}
/**
* @return the rulesSet
*/
public RulesSet getRulesSet() {
return rulesSet;
}
/**
* @param rulesSet the rulesSet to set
*/
public void setRulesSet(RulesSet rulesSet) {
this.rulesSet = rulesSet;
}
/* (non-Javadoc)
* @see eu.dnetlib.data.collective.transformation.rulelanguage.IRule#hasSet()
*/
@Override
public boolean hasSet() {
if (rulesSet != null) return true;
return false;
}
public String getTemplateMatch() {
return templateMatch;
}
public void setTemplateMatch(String templateMatch) {
this.templateMatch = templateMatch;
}
public Properties getProperties() {
return properties;
}
public void setProperties(Properties properties) {
this.properties = properties;
}
public boolean isSkip() {
return isSkip;
}
public void setSkip(boolean isSkip) {
this.isSkip = isSkip;
}
}

View File

@ -0,0 +1,29 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.rulelanguage;
import java.util.LinkedList;
import java.util.List;
/**
* @author jochen
*
*/
public class RulesSet {
private List<Rules> pendingRules = new LinkedList<Rules>();
/**
* @return the pendingRules
*/
public List<Rules> getPendingRules() {
return pendingRules;
}
/**
* @param pendingRules the pendingRules to set
*/
public void setPendingRules(List<Rules> pendingRules) {
this.pendingRules = pendingRules;
}
}

View File

@ -0,0 +1,59 @@
/* Generated By:JJTree: Do not edit this line. ASTMyAssign.java Version 4.3 */
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class ASTMyAssign extends AbstractNode {
private static final Log log = LogFactory.getLog(ASTMyAssign.class);
String value = "";
String field = "";
boolean isField = false;
boolean isAttribute = false;
public ASTMyAssign(int id) {
super(id);
}
public ASTMyAssign(FtScript p, int id) {
super(p, id);
}
/** Accept the visitor. **/
public Object jjtAccept(FtScriptVisitor visitor, Object data) {
return visitor.visit(this, data);
}
public void setAttribute(String value) {
// unquote
if (value.length() > 0){
this.value = value.substring(1, value.length() - 1);
log.debug("attribute unquoted: " + this.value);
}
this.isAttribute = true;
}
public void setFieldExpression(String field){
this.field = field;
this.isField = true;
}
public boolean isFieldExpression(){
return isField;
}
public boolean isAttribute(){
return isAttribute;
}
public String getFieldExpression(){
return this.field;
}
public String getValue() {
return value;
}
}
/* JavaCC - OriginalChecksum=f78d0265ec643fa70ae75afa6b875501 (do not edit this line) */

View File

@ -0,0 +1,38 @@
/* Generated By:JJTree: Do not edit this line. ASTMyAttribute.java Version 4.3 */
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
public class ASTMyAttribute extends SimpleNode {
private String value;
private String inputField;
public ASTMyAttribute(int id) {
super(id);
}
public ASTMyAttribute(FtScript p, int id) {
super(p, id);
}
/** Accept the visitor. **/
public Object jjtAccept(FtScriptVisitor visitor, Object data) {
return visitor.visit(this, data);
}
public void setAttributeValue(String value) {
this.value = value;
}
public String getAttributeValue(){
return value;
}
public String getAttributeInputField() {
return inputField;
}
public void setAttributeInputField(String inputField) {
this.inputField = inputField;
}
}
/* JavaCC - OriginalChecksum=13918b66ed87534be49661a37cadd261 (do not edit this line) */

View File

@ -0,0 +1,69 @@
/* Generated By:JJTree: Do not edit this line. ASTMyCondition.java Version 4.3 */
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
public class ASTMyCondition extends SimpleNode {
String applyExpression = "";
String conditionalExpression = "";
Rules primaryRule;
Rules secondaryRule;
public ASTMyCondition(int id) {
super(id);
}
public ASTMyCondition(FtScript p, int id) {
super(p, id);
}
/** Accept the visitor. **/
public Object jjtAccept(FtScriptVisitor visitor, Object data) {
return visitor.visit(this, data);
}
/**
* @return the applyExpression
*/
public String getApplyExpression() {
return applyExpression;
}
/**
* @param applyExpression the applyExpression to set
*/
public void setApplyExpression(String applyExpression) {
this.applyExpression = applyExpression;
}
public String getConditionalExpression() {
return conditionalExpression;
}
public void setConditionalExpression(String conditionalExpression) {
this.conditionalExpression = conditionalExpression;
}
public Rules getPrimaryRule() {
return primaryRule;
}
public void setPrimaryRule(Rules primaryRule) {
this.primaryRule = primaryRule;
}
public Rules getSecondaryRule() {
return secondaryRule;
}
public void setSecondaryRule(Rules secondaryRule) {
this.secondaryRule = secondaryRule;
}
}
/* JavaCC - OriginalChecksum=952b24322923d1de519b8698f1217414 (do not edit this line) */

View File

@ -0,0 +1,47 @@
/* Generated By:JJTree: Do not edit this line. ASTMyCopy.java Version 4.3 */
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
import eu.dnetlib.data.collective.transformation.rulelanguage.util.Converter;
public
class ASTMyCopy extends AbstractNode {
private String templateMatchName = "";
private String applyTemplateSelectExpression = "";
private String copySelectExpression = "";
public ASTMyCopy(int id) {
super(id);
}
public ASTMyCopy(FtScript p, int id) {
super(p, id);
}
/** Accept the visitor. **/
public Object jjtAccept(FtScriptVisitor visitor, Object data) {
return visitor.visit(this, data);
}
public void copy(String templateMatchName, String applyTemplateSelectExpression, String copySelectExpression) {
this.templateMatchName = Converter.getUnquotedString(templateMatchName);
this.applyTemplateSelectExpression = Converter.getUnquotedString(applyTemplateSelectExpression);
this.copySelectExpression = Converter.getUnquotedString(copySelectExpression);
}
public String getTemplateMatchName() {
return templateMatchName;
}
public String getApplyTemplateSelectExpression() {
return applyTemplateSelectExpression;
}
public String getCopySelectExpression() {
return copySelectExpression;
}
}
/* JavaCC - OriginalChecksum=0d1889e307d1bb558c977ae924f6bb37 (do not edit this line) */

View File

@ -0,0 +1,32 @@
/* Generated By:JJTree: Do not edit this line. ASTMyEmpty.java Version 4.3 */
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
public
class ASTMyEmpty extends AbstractNode {
boolean isEmpty = false;
public ASTMyEmpty(int id) {
super(id);
}
public ASTMyEmpty(FtScript p, int id) {
super(p, id);
}
/** Accept the visitor. **/
public Object jjtAccept(FtScriptVisitor visitor, Object data) {
return visitor.visit(this, data);
}
public boolean isEmpty(){
return isEmpty;
}
public void setEmpty(boolean aIsEmpty) {
this.isEmpty = aIsEmpty;
}
}
/* JavaCC - OriginalChecksum=3d979737222aaa0a33a774b3e718705f (do not edit this line) */

View File

@ -0,0 +1,32 @@
/* Generated By:JJTree: Do not edit this line. ASTMyImport.java Version 4.3 */
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
public class ASTMyImport extends SimpleNode {
private String scriptName = "";
public ASTMyImport(int id) {
super(id);
}
public ASTMyImport(FtScript p, int id) {
super(p, id);
}
/** Accept the visitor. **/
public Object jjtAccept(FtScriptVisitor visitor, Object data) {
return visitor.visit(this, data);
}
public void setScriptName(String scriptName) {
this.scriptName = scriptName;
}
public String getScriptName(){
return this.scriptName;
}
}
/* JavaCC - OriginalChecksum=ab107c30c540374469a393ab442757d5 (do not edit this line) */

View File

@ -0,0 +1,42 @@
/* Generated By:JJTree: Do not edit this line. ASTMyNs.java Version 4.3 */
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
/**
* @author jochen
*
*/
public class ASTMyNs extends SimpleNode {
private String nsPrefix;
private String nsUri;
public ASTMyNs(int id) {
super(id);
}
public ASTMyNs(FtScript p, int id) {
super(p, id);
}
/** Accept the visitor. **/
public Object jjtAccept(FtScriptVisitor visitor, Object data) {
return visitor.visit(this, data);
}
public void setNsDeclaration(String aNsPrefix, String aNsUri){
this.nsPrefix = aNsPrefix;
this.nsUri = aNsUri.substring( (aNsUri.indexOf("\"") + 1), aNsUri.lastIndexOf("\"") );
}
public String getNsPrefix(){
return this.nsPrefix;
}
public String getNsUri(){
return this.nsUri;
}
}
/* JavaCC - OriginalChecksum=9d6d617b7c3f22f3603fcbd13f738170 (do not edit this line) */

View File

@ -0,0 +1,249 @@
/* Generated By:JJTree: Do not edit this line. ASTMyOp.java Version 4.3 */
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import eu.dnetlib.data.collective.transformation.engine.functions.Convert;
import eu.dnetlib.data.collective.transformation.engine.functions.Extract;
import eu.dnetlib.data.collective.transformation.engine.functions.IdentifierExtract;
import eu.dnetlib.data.collective.transformation.engine.functions.Lookup;
import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression;
import eu.dnetlib.data.collective.transformation.engine.functions.Split;
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
import eu.dnetlib.data.collective.transformation.rulelanguage.util.Converter;
import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall;
/**
* @author jochen
*
*/
public class ASTMyOp extends AbstractNode {
private boolean isFunGetValue = false;
private boolean isFunConvert = false;
//private boolean isFunConvertString = false;
private boolean isFunRegExpr = false;
private boolean isFunExtract = false;
private boolean isFunSplit = false;
private boolean isFunLookup = false;
private boolean isFunConcat = false;
private boolean isFunIdentifierExtract = false;
private boolean doPreprocess = true;
private String functionName = null;
private Map<String, String> paramMap = null;
private List<Rules> rulesList = new LinkedList<Rules>();
private List<String> concatList = new LinkedList<String>();
private String externalFunctionName = null;
public ASTMyOp(int id) {
super(id);
}
public ASTMyOp(FtScript p, int id) {
super(p, id);
}
/** Accept the visitor. **/
public Object jjtAccept(FtScriptVisitor visitor, Object data) {
return visitor.visit(this, data);
}
public void getValue(String functionName){
this.functionName = functionName;
this.isFunGetValue = true;
this.externalFunctionName = "getValue";
paramMap = new LinkedHashMap<String, String>();
paramMap.put("functionName", functionName);
}
public String getFunctionName(){
return functionName;
}
/**
* creates a new FunctionCall object
* @return a function call object
*/
public FunctionCall createFunctionCall(boolean aIsStatic){
FunctionCall fc = new FunctionCall(aIsStatic, this.doPreprocess);
fc.setExternalFunctionName(externalFunctionName);
fc.setParameters(paramMap);
fc.setParamList(concatList);
return fc;
}
public boolean isGetValue(){
return isFunGetValue;
}
public boolean isConvert(){
return isFunConvert;
}
public boolean isConvertString(){
return isConvertString();
}
public boolean isRegExpr(){
return isFunRegExpr;
}
public boolean isExtract(){
return isFunExtract;
}
public boolean isFunSplit(){
return isFunSplit;
}
public boolean isLookup(){
return isFunLookup;
}
public boolean isConcat(){
return isFunConcat;
}
public void convert(String inputField, String vocabulary, String defaultPattern, String function){
// evaluate the inputField arg first then decide to apply 'convert' or 'convertString'
String converterInputField = Converter.getXpathFromXpathExpr(inputField);
if (Converter.isXpathReturningString(converterInputField)){
this.externalFunctionName = "convertString";
//this.isFunConvertString = true;
}else{
this.externalFunctionName = "convert";
this.isFunConvert = true;
}
this.doPreprocess = false;
paramMap = new LinkedHashMap<String, String>();
paramMap.put(Convert.paramFieldValue, converterInputField);
paramMap.put(Convert.paramVocabularyName, vocabulary);
if (defaultPattern.trim().length() > 0)
paramMap.put(Convert.paramDefaultPattern, defaultPattern.substring( (defaultPattern.indexOf("\"") + 1), defaultPattern.lastIndexOf("\"") ));
if (function.trim().length() > 0)
paramMap.put(Convert.paramFunction, function.substring( (function.indexOf("\"") + 1), function.lastIndexOf("\"") ));
}
public void extract(String feature){
this.isFunExtract = true;
this.externalFunctionName = "extract";
paramMap = new LinkedHashMap<String, String>();
paramMap.put(Extract.paramNameFeature, feature);
}
public void regExpr(String inputExpr1, String inputExpr2, String regularExpr){
this.isFunRegExpr = true;
this.externalFunctionName = "regExpr";
this.doPreprocess = false;
paramMap = new LinkedHashMap<String, String>();
if (inputExpr1.startsWith("xpath:")){
paramMap.put(RegularExpression.paramExpr1, Converter.getXpathFromXpathExpr(inputExpr1));
}else{
paramMap.put(RegularExpression.paramExpr1, inputExpr1);
}
if (inputExpr2.startsWith("xpath:")){
paramMap.put(RegularExpression.paramExpr2, Converter.getXpathFromXpathExpr(inputExpr2));
}else{
paramMap.put(RegularExpression.paramExpr2, inputExpr2);
}
String regExpr = regularExpr.substring( (regularExpr.indexOf("\"") + 1), regularExpr.lastIndexOf("\"") );
paramMap.put(RegularExpression.paramRegularExpr, regExpr);
}
public Map<String, String> getParamMap(){
return paramMap;
}
/**
* split values of an element into multiple elements
* @param inputValue the input value
* @param elementName the name of the target element
* @param regExpr regular expression (delimiter)
*/
public void split(String inputValue, String elementName, String regularExpr) {
// TODO Auto-generated method stub
this.isFunSplit = true;
this.doPreprocess = false;
this.externalFunctionName = "split";
paramMap = new LinkedHashMap<String, String>();
if (inputValue.startsWith("xpath:")){
paramMap.put(Split.paramInputExpr, Converter.getXpathFromXpathExpr(inputValue));
}else{
paramMap.put(Split.paramInputExpr, inputValue);
}
String regExpr = regularExpr.substring( (regularExpr.indexOf("\"") + 1), regularExpr.lastIndexOf("\"") );
paramMap.put(Split.paramRegExpr, regExpr);
String elemName = elementName.substring( (elementName.indexOf("\"") + 1), elementName.lastIndexOf("\"") );
paramMap.put(Split.paramElementName, elemName);
}
public void addRule(Rules aRule) {
// TODO Auto-generated method stub
rulesList.add(aRule);
}
/**
* set parameters for the dblookup function
* @param aSqlExpr
*/
public void lookup(String aExprId, String aExprProperty) {
this.isFunLookup = true;
this.doPreprocess = false;
this.externalFunctionName = "lookup";
paramMap = new LinkedHashMap<String, String>();
if (aExprId.startsWith("xpath:")){
paramMap.put(Lookup.paramExprIdentifier, Converter.getXpathFromXpathExpr(aExprId));
}else{
paramMap.put(Lookup.paramExprIdentifier, aExprId);
}
// unquote
String exprProperty;
if (aExprProperty.length() > 0){
exprProperty = aExprProperty.substring(1, aExprProperty.length() - 1);
paramMap.put(Lookup.paramExprProperty, exprProperty);
}
}
public void concat(){
this.isFunConcat = true;
this.doPreprocess = true;
this.externalFunctionName = "concat";
}
public void addConcat(String aTerm) {
concatList.add(aTerm);
}
public void identifierExtract(String aXpathExprJsonString, String aXpathExprInSource,
String aRegExpr) {
this.isFunIdentifierExtract = true;
this.doPreprocess = false;
this.externalFunctionName = "identifierExtract";
paramMap = new LinkedHashMap<String, String>();
if (aXpathExprInSource.startsWith("xpath:")) paramMap.put(IdentifierExtract.paramXpathExprInSource, Converter.getXpathFromXpathExpr(aXpathExprInSource));
else paramMap.put(IdentifierExtract.paramXpathExprInSource, aXpathExprInSource);
// List<String> xpathExprList = JSONParser.defaultJSONParser().parse(List.class, aXpathExprJsonString);
paramMap.put(IdentifierExtract.paramXpathExprJson, Converter.getUnquotedString(aXpathExprJsonString));
paramMap.put(IdentifierExtract.paramRegExpr, Converter.getUnquotedString(aRegExpr));
}
/**
* @return the isFunIdentifierExtract
*/
public boolean isFunIdentifierExtract() {
return isFunIdentifierExtract;
}
}
/* JavaCC - OriginalChecksum=3d515ff3345fb356c3993ac1bf1d77cc (do not edit this line) */

View File

@ -0,0 +1,56 @@
/* Generated By:JJTree: Do not edit this line. ASTMyPreprocess.java Version 4.3 */
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
public class ASTMyPreprocess extends SimpleNode {
private String id;
private String funcName;
private String parameter;
public ASTMyPreprocess(int id) {
super(id);
}
public ASTMyPreprocess(FtScript p, int id) {
super(p, id);
}
/** Accept the visitor. **/
public Object jjtAccept(FtScriptVisitor visitor, Object data) {
return visitor.visit(this, data);
}
public String getFunctionName(){
return this.funcName;
}
public String getParameter(){
return this.parameter;
}
public String getId(){
return id;
}
public void preprocess(String aPreprocessId, String aFunction, String aParameter) {
this.id = aPreprocessId;
this.funcName = aFunction;
// unquote
if (aParameter.length() > 0){
this.parameter = aParameter.substring(1, aParameter.length() - 1);
}
}
public void preprocess(String aFunction, String aParameter){
this.funcName = aFunction;
// unquote
if (aParameter.length() > 0){
this.parameter = aParameter.substring(1, aParameter.length() - 1);
}
}
public void preprocess(String aFunction) {
this.funcName = aFunction;
}
}
/* JavaCC - OriginalChecksum=b9229360af18a53de1ce87664846e442 (do not edit this line) */

View File

@ -0,0 +1,48 @@
/* Generated By:JJTree: Do not edit this line. ASTMyScript.java Version 4.3 */
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
public class ASTMyScript extends SimpleNode {
public static enum SCRIPTTYPE{MAINSCRIPT, SUBSCRIPT};
private String scriptName = "";
private SCRIPTTYPE scriptType;
public ASTMyScript(int id) {
super(id);
}
public ASTMyScript(FtScript p, int id) {
super(p, id);
}
/** Accept the visitor. **/
public Object jjtAccept(FtScriptVisitor visitor, Object data) {
return visitor.visit(this, data);
}
public void setScript(String scriptName) {
this.scriptName = scriptName.substring( (scriptName.indexOf("\"") + 1), scriptName.lastIndexOf("\"") );
}
public String getScript() {
return scriptName;
}
/**
* @param scriptType the scriptType to set
*/
public void setScriptType(SCRIPTTYPE scriptType) {
this.scriptType = scriptType;
}
/**
* @return the scriptType
*/
public SCRIPTTYPE getScriptType() {
return scriptType;
}
}
/* JavaCC - OriginalChecksum=c9a44759b6c7b4b163c6b10f67226e91 (do not edit this line) */

View File

@ -0,0 +1,68 @@
/* Generated By:JJTree: Do not edit this line. ASTMySet.java Version 4.3 */
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
import java.util.LinkedList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
/**
* This class is primarily designed to assign values to the element and attributes of the element
* @author jochen
*
*/
public class ASTMySet extends AbstractNode {
private static final Log log = LogFactory.getLog(ASTMySet.class);
private List<Rules> rulesList = new LinkedList<Rules>();
private String expressionValue;
private String constValue;
private boolean isExpressionValue = false;
public ASTMySet(int id) {
super(id);
}
public ASTMySet(FtScript p, int id) {
super(p, id);
}
/** Accept the visitor. **/
public Object jjtAccept(FtScriptVisitor visitor, Object data) {
return visitor.visit(this, data);
}
public void addAttributeRule(Rules aRule){
log.debug("called method: ASTMySet.addRule " + aRule.getUniqueName()+
" attribute: " + aRule.getAttribute() + " defines variable?: " + aRule.definesVariable());
rulesList.add(aRule);
}
public List<Rules> getRules(){
return this.rulesList;
}
public void setValueExpression(String aExpression) {
log.debug("expression: " + aExpression);
this.expressionValue = aExpression;
this.isExpressionValue = true;
}
public String getValueExpression(){
return this.expressionValue;
}
public boolean isValueExpression(){
return this.isExpressionValue;
}
public void setAttribute(String aValue) {
this.constValue = aValue;
}
}
/* JavaCC - OriginalChecksum=1a796456845c74ed0ee62389483ce5a7 (do not edit this line) */

View File

@ -0,0 +1,29 @@
/* Generated By:JJTree: Do not edit this line. ASTMySkip.java Version 4.3 */
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
public
class ASTMySkip extends AbstractNode {
public ASTMySkip(int id) {
super(id);
}
public ASTMySkip(FtScript p, int id) {
super(p, id);
}
/** Accept the visitor. **/
public Object jjtAccept(FtScriptVisitor visitor, Object data) {
return visitor.visit(this, data);
}
public void skipRecord() {
// TODO Auto-generated method stub
}
}
/* JavaCC - OriginalChecksum=149f1dfbf543fde562b05c417044a68a (do not edit this line) */

View File

@ -0,0 +1,21 @@
/* Generated By:JJTree: Do not edit this line. ASTStart.java Version 4.3 */
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
public
class ASTStart extends SimpleNode {
public ASTStart(int id) {
super(id);
}
public ASTStart(FtScript p, int id) {
super(p, id);
}
/** Accept the visitor. **/
public Object jjtAccept(FtScriptVisitor visitor, Object data) {
return visitor.visit(this, data);
}
}
/* JavaCC - OriginalChecksum=97710dc79a4caf565e6b2feba3f4fd69 (do not edit this line) */

View File

@ -0,0 +1,42 @@
/**
*
*/
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
/**
* @author jochen
*
*/
public abstract class AbstractNode extends SimpleNode {
Rules rule;
/**
* @param i
*/
public AbstractNode(int i) {
super(i);
// TODO Auto-generated constructor stub
}
/**
* @param p
* @param i
*/
public AbstractNode(FtScript p, int i) {
super(p, i);
// TODO Auto-generated constructor stub
}
public Rules getRule() {
return rule;
}
public void setRule(Rules rule) {
this.rule = rule;
}
}

View File

@ -0,0 +1,217 @@
/* Generated By:JJTree&JavaCC: Do not edit this line. FtScriptConstants.java */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
/**
* Token literal values and constants.
* Generated by org.javacc.parser.OtherFilesGen#start()
*/
public interface FtScriptConstants {
/** End of File. */
int EOF = 0;
/** RegularExpression Id. */
int SINGLE_LINE_COMMENT = 8;
/** RegularExpression Id. */
int IMPORT = 9;
/** RegularExpression Id. */
int TRANS = 10;
/** RegularExpression Id. */
int IF = 11;
/** RegularExpression Id. */
int ELSE = 12;
/** RegularExpression Id. */
int END = 13;
/** RegularExpression Id. */
int EXTRACT = 14;
/** RegularExpression Id. */
int IDENTIFIEREXTRACT = 15;
/** RegularExpression Id. */
int CONVERT = 16;
/** RegularExpression Id. */
int REGEXPR = 17;
/** RegularExpression Id. */
int GETVALUE = 18;
/** RegularExpression Id. */
int COPY = 19;
/** RegularExpression Id. */
int DECLARE_NAMESPACE = 20;
/** RegularExpression Id. */
int DECLARE_SCRIPT = 21;
/** RegularExpression Id. */
int DECLARE_SUBSCRIPT = 22;
/** RegularExpression Id. */
int PREPROCESS = 23;
/** RegularExpression Id. */
int SET = 24;
/** RegularExpression Id. */
int SKIPRECORD = 25;
/** RegularExpression Id. */
int SPLIT = 26;
/** RegularExpression Id. */
int STATIC = 27;
/** RegularExpression Id. */
int XPATH_SCHEME = 28;
/** RegularExpression Id. */
int APPLY = 29;
/** RegularExpression Id. */
int EMPTY = 30;
/** RegularExpression Id. */
int DBLOOKUP = 31;
/** RegularExpression Id. */
int BLACKLIST = 32;
/** RegularExpression Id. */
int LOOKUP = 33;
/** RegularExpression Id. */
int CONCAT = 34;
/** RegularExpression Id. */
int SEMICOLON = 35;
/** RegularExpression Id. */
int RBRACKET = 36;
/** RegularExpression Id. */
int LBRACE = 37;
/** RegularExpression Id. */
int RBRACE = 38;
/** RegularExpression Id. */
int COMMA = 39;
/** RegularExpression Id. */
int RPAREN = 40;
/** RegularExpression Id. */
int QUOTE = 41;
/** RegularExpression Id. */
int EQUAL = 42;
/** RegularExpression Id. */
int NOTEQUAL = 43;
/** RegularExpression Id. */
int GT = 44;
/** RegularExpression Id. */
int LT = 45;
/** RegularExpression Id. */
int GTE = 46;
/** RegularExpression Id. */
int LTE = 47;
/** RegularExpression Id. */
int PLUS = 48;
/** RegularExpression Id. */
int VBAR = 49;
/** RegularExpression Id. */
int SLASH = 50;
/** RegularExpression Id. */
int DOT_DOT = 51;
/** RegularExpression Id. */
int AT = 52;
/** RegularExpression Id. */
int LPAREN = 53;
/** RegularExpression Id. */
int LBRACKET = 54;
/** RegularExpression Id. */
int DOLLAR_QNAME = 55;
/** RegularExpression Id. */
int PERCENT_QNAME = 56;
/** RegularExpression Id. */
int XPATH = 57;
/** RegularExpression Id. */
int JOBCONST = 58;
/** RegularExpression Id. */
int JOBCONST_PREFIX = 59;
/** RegularExpression Id. */
int QUOTED_STRING = 60;
/** RegularExpression Id. */
int SINGLE_QUOTE = 61;
/** RegularExpression Id. */
int CHARACTER_LITERAL = 62;
/** RegularExpression Id. */
int STRING_LITERAL = 63;
/** RegularExpression Id. */
int URI = 64;
/** RegularExpression Id. */
int IDENTIFIER = 65;
/** RegularExpression Id. */
int LETTER_OR_DIGIT = 66;
/** RegularExpression Id. */
int MINUS = 67;
/** RegularExpression Id. */
int DOT = 68;
/** RegularExpression Id. */
int COLON = 69;
/** Lexical state. */
int DEFAULT = 0;
/** Lexical state. */
int IN_SINGLE_LINE_COMMENT = 1;
/** Literal token values. */
String[] tokenImage = {
"<EOF>",
"\" \"",
"\"\\t\"",
"\"\\n\"",
"\"\\r\"",
"<token of kind 5>",
"<token of kind 6>",
"\"//\"",
"<SINGLE_LINE_COMMENT>",
"\"import\"",
"\"trans\"",
"\"if\"",
"\"else\"",
"\"end\"",
"\"Extract\"",
"\"identifierExtract\"",
"\"Convert\"",
"\"RegExpr\"",
"\"getValue\"",
"\"copy\"",
"\"declare_ns\"",
"\"declare_script\"",
"\"declare_subscript\"",
"\"preprocess\"",
"\"set\"",
"\"skipRecord\"",
"\"split\"",
"\"static\"",
"\"xpath:\"",
"\"apply\"",
"\"empty\"",
"\"dblookup\"",
"\"blacklist\"",
"\"lookup\"",
"\"concat\"",
"\";\"",
"\"]\"",
"\"{\"",
"\"}\"",
"\",\"",
"\")\"",
"\"\\\'\"",
"\"=\"",
"\"!=\"",
"<GT>",
"<LT>",
"<GTE>",
"<LTE>",
"\"+\"",
"\"|\"",
"\"/\"",
"\"..\"",
"\"@\"",
"\"(\"",
"\"[\"",
"<DOLLAR_QNAME>",
"<PERCENT_QNAME>",
"<XPATH>",
"<JOBCONST>",
"\"$job.\"",
"<QUOTED_STRING>",
"<SINGLE_QUOTE>",
"<CHARACTER_LITERAL>",
"<STRING_LITERAL>",
"<URI>",
"<IDENTIFIER>",
"<LETTER_OR_DIGIT>",
"\"-\"",
"\".\"",
"\":\"",
};
}

View File

@ -0,0 +1,39 @@
/* Generated By:JavaCC: Do not edit this line. FtScriptTreeConstants.java Version 5.0 */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
public interface FtScriptTreeConstants
{
public int JJTSTART = 0;
public int JJTMYASSIGN = 1;
public int JJTVOID = 2;
public int JJTMYATTRIBUTE = 3;
public int JJTMYCONDITION = 4;
public int JJTMYEMPTY = 5;
public int JJTMYIMPORT = 6;
public int JJTMYNS = 7;
public int JJTMYPREPROCESS = 8;
public int JJTMYSET = 9;
public int JJTMYSKIP = 10;
public int JJTMYCOPY = 11;
public int JJTMYOP = 12;
public int JJTMYSCRIPT = 13;
public String[] jjtNodeName = {
"Start",
"MyAssign",
"void",
"MyAttribute",
"MyCondition",
"MyEmpty",
"MyImport",
"MyNs",
"MyPreprocess",
"MySet",
"MySkip",
"MyCopy",
"MyOp",
"MyScript",
};
}
/* JavaCC - OriginalChecksum=c8056b53459b9b66f3a28fd32fead01c (do not edit this line) */

View File

@ -0,0 +1,21 @@
/* Generated By:JavaCC: Do not edit this line. FtScriptVisitor.java Version 5.0 */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
public interface FtScriptVisitor
{
public Object visit(SimpleNode node, Object data);
public Object visit(ASTStart node, Object data);
public Object visit(ASTMyAssign node, Object data);
public Object visit(ASTMyAttribute node, Object data);
public Object visit(ASTMyCondition node, Object data);
public Object visit(ASTMyEmpty node, Object data);
public Object visit(ASTMyImport node, Object data);
public Object visit(ASTMyNs node, Object data);
public Object visit(ASTMyPreprocess node, Object data);
public Object visit(ASTMySet node, Object data);
public Object visit(ASTMySkip node, Object data);
public Object visit(ASTMyCopy node, Object data);
public Object visit(ASTMyOp node, Object data);
public Object visit(ASTMyScript node, Object data);
}
/* JavaCC - OriginalChecksum=878b8e974e60d303d7be3d6ce91428ec (do not edit this line) */

View File

@ -0,0 +1,123 @@
/* Generated By:JavaCC: Do not edit this line. JJTFtScriptState.java Version 5.0 */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
public class JJTFtScriptState {
private java.util.List<Node> nodes;
private java.util.List<Integer> marks;
private int sp; // number of nodes on stack
private int mk; // current mark
private boolean node_created;
public JJTFtScriptState() {
nodes = new java.util.ArrayList<Node>();
marks = new java.util.ArrayList<Integer>();
sp = 0;
mk = 0;
}
/* Determines whether the current node was actually closed and
pushed. This should only be called in the final user action of a
node scope. */
public boolean nodeCreated() {
return node_created;
}
/* Call this to reinitialize the node stack. It is called
automatically by the parser's ReInit() method. */
public void reset() {
nodes.clear();
marks.clear();
sp = 0;
mk = 0;
}
/* Returns the root node of the AST. It only makes sense to call
this after a successful parse. */
public Node rootNode() {
return nodes.get(0);
}
/* Pushes a node on to the stack. */
public void pushNode(Node n) {
nodes.add(n);
++sp;
}
/* Returns the node on the top of the stack, and remove it from the
stack. */
public Node popNode() {
if (--sp < mk) {
mk = marks.remove(marks.size()-1);
}
return nodes.remove(nodes.size()-1);
}
/* Returns the node currently on the top of the stack. */
public Node peekNode() {
return nodes.get(nodes.size()-1);
}
/* Returns the number of children on the stack in the current node
scope. */
public int nodeArity() {
return sp - mk;
}
public void clearNodeScope(Node n) {
while (sp > mk) {
popNode();
}
mk = marks.remove(marks.size()-1);
}
public void openNodeScope(Node n) {
marks.add(mk);
mk = sp;
n.jjtOpen();
}
/* A definite node is constructed from a specified number of
children. That number of nodes are popped from the stack and
made the children of the definite node. Then the definite node
is pushed on to the stack. */
public void closeNodeScope(Node n, int num) {
mk = marks.remove(marks.size()-1);
while (num-- > 0) {
Node c = popNode();
c.jjtSetParent(n);
n.jjtAddChild(c, num);
}
n.jjtClose();
pushNode(n);
node_created = true;
}
/* A conditional node is constructed if its condition is true. All
the nodes that have been pushed since the node was opened are
made children of the conditional node, which is then pushed
on to the stack. If the condition is false the node is not
constructed and they are left on the stack. */
public void closeNodeScope(Node n, boolean condition) {
if (condition) {
int a = nodeArity();
mk = marks.remove(marks.size()-1);
while (a-- > 0) {
Node c = popNode();
c.jjtSetParent(n);
n.jjtAddChild(c, a);
}
n.jjtClose();
pushNode(n);
node_created = true;
} else {
mk = marks.remove(marks.size()-1);
node_created = false;
}
}
}
/* JavaCC - OriginalChecksum=ecb473ab5135001f535b0946f205a35d (do not edit this line) */

View File

@ -0,0 +1,39 @@
/* Generated By:JJTree: Do not edit this line. Node.java Version 4.3 */
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
/* All AST nodes must implement this interface. It provides basic
machinery for constructing the parent and child relationships
between nodes. */
public
interface Node {
/** This method is called after the node has been made the current
node. It indicates that child nodes can now be added to it. */
public void jjtOpen();
/** This method is called after all the child nodes have been
added. */
public void jjtClose();
/** This pair of methods are used to inform the node of its
parent. */
public void jjtSetParent(Node n);
public Node jjtGetParent();
/** This method tells the node to add its argument to the node's
list of children. */
public void jjtAddChild(Node n, int i);
/** This method returns a child node. The children are numbered
from zero, left to right. */
public Node jjtGetChild(int i);
/** Return the number of children the node has. */
public int jjtGetNumChildren();
/** Accept the visitor. **/
public Object jjtAccept(FtScriptVisitor visitor, Object data);
}
/* JavaCC - OriginalChecksum=a824598ce2b865e1e8afb12061994660 (do not edit this line) */

View File

@ -0,0 +1,187 @@
/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 5.0 */
/* JavaCCOptions:KEEP_LINE_COL=null */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
/**
* This exception is thrown when parse errors are encountered.
* You can explicitly create objects of this exception type by
* calling the method generateParseException in the generated
* parser.
*
* You can modify this class to customize your error reporting
* mechanisms so long as you retain the public fields.
*/
public class ParseException extends Exception {
/**
* The version identifier for this Serializable class.
* Increment only if the <i>serialized</i> form of the
* class changes.
*/
private static final long serialVersionUID = 1L;
/**
* This constructor is used by the method "generateParseException"
* in the generated parser. Calling this constructor generates
* a new object of this type with the fields "currentToken",
* "expectedTokenSequences", and "tokenImage" set.
*/
public ParseException(Token currentTokenVal,
int[][] expectedTokenSequencesVal,
String[] tokenImageVal
)
{
super(initialise(currentTokenVal, expectedTokenSequencesVal, tokenImageVal));
currentToken = currentTokenVal;
expectedTokenSequences = expectedTokenSequencesVal;
tokenImage = tokenImageVal;
}
/**
* The following constructors are for use by you for whatever
* purpose you can think of. Constructing the exception in this
* manner makes the exception behave in the normal way - i.e., as
* documented in the class "Throwable". The fields "errorToken",
* "expectedTokenSequences", and "tokenImage" do not contain
* relevant information. The JavaCC generated code does not use
* these constructors.
*/
public ParseException() {
super();
}
/** Constructor with message. */
public ParseException(String message) {
super(message);
}
/**
* This is the last token that has been consumed successfully. If
* this object has been created due to a parse error, the token
* followng this token will (therefore) be the first error token.
*/
public Token currentToken;
/**
* Each entry in this array is an array of integers. Each array
* of integers represents a sequence of tokens (by their ordinal
* values) that is expected at this point of the parse.
*/
public int[][] expectedTokenSequences;
/**
* This is a reference to the "tokenImage" array of the generated
* parser within which the parse error occurred. This array is
* defined in the generated ...Constants interface.
*/
public String[] tokenImage;
/**
* It uses "currentToken" and "expectedTokenSequences" to generate a parse
* error message and returns it. If this object has been created
* due to a parse error, and you do not catch it (it gets thrown
* from the parser) the correct error message
* gets displayed.
*/
private static String initialise(Token currentToken,
int[][] expectedTokenSequences,
String[] tokenImage) {
String eol = System.getProperty("line.separator", "\n");
StringBuffer expected = new StringBuffer();
int maxSize = 0;
for (int i = 0; i < expectedTokenSequences.length; i++) {
if (maxSize < expectedTokenSequences[i].length) {
maxSize = expectedTokenSequences[i].length;
}
for (int j = 0; j < expectedTokenSequences[i].length; j++) {
expected.append(tokenImage[expectedTokenSequences[i][j]]).append(' ');
}
if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) {
expected.append("...");
}
expected.append(eol).append(" ");
}
String retval = "Encountered \"";
Token tok = currentToken.next;
for (int i = 0; i < maxSize; i++) {
if (i != 0) retval += " ";
if (tok.kind == 0) {
retval += tokenImage[0];
break;
}
retval += " " + tokenImage[tok.kind];
retval += " \"";
retval += add_escapes(tok.image);
retval += " \"";
tok = tok.next;
}
retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn;
retval += "." + eol;
if (expectedTokenSequences.length == 1) {
retval += "Was expecting:" + eol + " ";
} else {
retval += "Was expecting one of:" + eol + " ";
}
retval += expected.toString();
return retval;
}
/**
* The end of line string for this machine.
*/
protected String eol = System.getProperty("line.separator", "\n");
/**
* Used to convert raw characters to their escaped version
* when these raw version cannot be used as part of an ASCII
* string literal.
*/
static String add_escapes(String str) {
StringBuffer retval = new StringBuffer();
char ch;
for (int i = 0; i < str.length(); i++) {
switch (str.charAt(i))
{
case 0 :
continue;
case '\b':
retval.append("\\b");
continue;
case '\t':
retval.append("\\t");
continue;
case '\n':
retval.append("\\n");
continue;
case '\f':
retval.append("\\f");
continue;
case '\r':
retval.append("\\r");
continue;
case '\"':
retval.append("\\\"");
continue;
case '\'':
retval.append("\\\'");
continue;
case '\\':
retval.append("\\\\");
continue;
default:
if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
String s = "0000" + Integer.toString(ch, 16);
retval.append("\\u" + s.substring(s.length() - 4, s.length()));
} else {
retval.append(ch);
}
continue;
}
}
return retval.toString();
}
}
/* JavaCC - OriginalChecksum=5442a73da71e919193d4dba049348359 (do not edit this line) */

View File

@ -0,0 +1,471 @@
/* Generated By:JavaCC: Do not edit this line. SimpleCharStream.java Version 5.0 */
/* JavaCCOptions:STATIC=false,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
/**
* An implementation of interface CharStream, where the stream is assumed to
* contain only ASCII characters (without unicode processing).
*/
public class SimpleCharStream
{
/** Whether parser is static. */
public static final boolean staticFlag = false;
int bufsize;
int available;
int tokenBegin;
/** Position in buffer. */
public int bufpos = -1;
protected int bufline[];
protected int bufcolumn[];
protected int column = 0;
protected int line = 1;
protected boolean prevCharIsCR = false;
protected boolean prevCharIsLF = false;
protected java.io.Reader inputStream;
protected char[] buffer;
protected int maxNextCharInd = 0;
protected int inBuf = 0;
protected int tabSize = 8;
protected void setTabSize(int i) { tabSize = i; }
protected int getTabSize(int i) { return tabSize; }
protected void ExpandBuff(boolean wrapAround)
{
char[] newbuffer = new char[bufsize + 2048];
int newbufline[] = new int[bufsize + 2048];
int newbufcolumn[] = new int[bufsize + 2048];
try
{
if (wrapAround)
{
System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
System.arraycopy(buffer, 0, newbuffer, bufsize - tokenBegin, bufpos);
buffer = newbuffer;
System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos);
bufline = newbufline;
System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos);
bufcolumn = newbufcolumn;
maxNextCharInd = (bufpos += (bufsize - tokenBegin));
}
else
{
System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
buffer = newbuffer;
System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
bufline = newbufline;
System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
bufcolumn = newbufcolumn;
maxNextCharInd = (bufpos -= tokenBegin);
}
}
catch (Throwable t)
{
throw new Error(t.getMessage());
}
bufsize += 2048;
available = bufsize;
tokenBegin = 0;
}
protected void FillBuff() throws java.io.IOException
{
if (maxNextCharInd == available)
{
if (available == bufsize)
{
if (tokenBegin > 2048)
{
bufpos = maxNextCharInd = 0;
available = tokenBegin;
}
else if (tokenBegin < 0)
bufpos = maxNextCharInd = 0;
else
ExpandBuff(false);
}
else if (available > tokenBegin)
available = bufsize;
else if ((tokenBegin - available) < 2048)
ExpandBuff(true);
else
available = tokenBegin;
}
int i;
try {
if ((i = inputStream.read(buffer, maxNextCharInd, available - maxNextCharInd)) == -1)
{
inputStream.close();
throw new java.io.IOException();
}
else
maxNextCharInd += i;
return;
}
catch(java.io.IOException e) {
--bufpos;
backup(0);
if (tokenBegin == -1)
tokenBegin = bufpos;
throw e;
}
}
/** Start. */
public char BeginToken() throws java.io.IOException
{
tokenBegin = -1;
char c = readChar();
tokenBegin = bufpos;
return c;
}
protected void UpdateLineColumn(char c)
{
column++;
if (prevCharIsLF)
{
prevCharIsLF = false;
line += (column = 1);
}
else if (prevCharIsCR)
{
prevCharIsCR = false;
if (c == '\n')
{
prevCharIsLF = true;
}
else
line += (column = 1);
}
switch (c)
{
case '\r' :
prevCharIsCR = true;
break;
case '\n' :
prevCharIsLF = true;
break;
case '\t' :
column--;
column += (tabSize - (column % tabSize));
break;
default :
break;
}
bufline[bufpos] = line;
bufcolumn[bufpos] = column;
}
/** Read a character. */
public char readChar() throws java.io.IOException
{
if (inBuf > 0)
{
--inBuf;
if (++bufpos == bufsize)
bufpos = 0;
return buffer[bufpos];
}
if (++bufpos >= maxNextCharInd)
FillBuff();
char c = buffer[bufpos];
UpdateLineColumn(c);
return c;
}
@Deprecated
/**
* @deprecated
* @see #getEndColumn
*/
public int getColumn() {
return bufcolumn[bufpos];
}
@Deprecated
/**
* @deprecated
* @see #getEndLine
*/
public int getLine() {
return bufline[bufpos];
}
/** Get token end column number. */
public int getEndColumn() {
return bufcolumn[bufpos];
}
/** Get token end line number. */
public int getEndLine() {
return bufline[bufpos];
}
/** Get token beginning column number. */
public int getBeginColumn() {
return bufcolumn[tokenBegin];
}
/** Get token beginning line number. */
public int getBeginLine() {
return bufline[tokenBegin];
}
/** Backup a number of characters. */
public void backup(int amount) {
inBuf += amount;
if ((bufpos -= amount) < 0)
bufpos += bufsize;
}
/** Constructor. */
public SimpleCharStream(java.io.Reader dstream, int startline,
int startcolumn, int buffersize)
{
inputStream = dstream;
line = startline;
column = startcolumn - 1;
available = bufsize = buffersize;
buffer = new char[buffersize];
bufline = new int[buffersize];
bufcolumn = new int[buffersize];
}
/** Constructor. */
public SimpleCharStream(java.io.Reader dstream, int startline,
int startcolumn)
{
this(dstream, startline, startcolumn, 4096);
}
/** Constructor. */
public SimpleCharStream(java.io.Reader dstream)
{
this(dstream, 1, 1, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.Reader dstream, int startline,
int startcolumn, int buffersize)
{
inputStream = dstream;
line = startline;
column = startcolumn - 1;
if (buffer == null || buffersize != buffer.length)
{
available = bufsize = buffersize;
buffer = new char[buffersize];
bufline = new int[buffersize];
bufcolumn = new int[buffersize];
}
prevCharIsLF = prevCharIsCR = false;
tokenBegin = inBuf = maxNextCharInd = 0;
bufpos = -1;
}
/** Reinitialise. */
public void ReInit(java.io.Reader dstream, int startline,
int startcolumn)
{
ReInit(dstream, startline, startcolumn, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.Reader dstream)
{
ReInit(dstream, 1, 1, 4096);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
{
this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream, int startline,
int startcolumn, int buffersize)
{
this(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
int startcolumn) throws java.io.UnsupportedEncodingException
{
this(dstream, encoding, startline, startcolumn, 4096);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream, int startline,
int startcolumn)
{
this(dstream, startline, startcolumn, 4096);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
{
this(dstream, encoding, 1, 1, 4096);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream)
{
this(dstream, 1, 1, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream, String encoding, int startline,
int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
{
ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream, int startline,
int startcolumn, int buffersize)
{
ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
{
ReInit(dstream, encoding, 1, 1, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream)
{
ReInit(dstream, 1, 1, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream, String encoding, int startline,
int startcolumn) throws java.io.UnsupportedEncodingException
{
ReInit(dstream, encoding, startline, startcolumn, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream, int startline,
int startcolumn)
{
ReInit(dstream, startline, startcolumn, 4096);
}
/** Get token literal value. */
public String GetImage()
{
if (bufpos >= tokenBegin)
return new String(buffer, tokenBegin, bufpos - tokenBegin + 1);
else
return new String(buffer, tokenBegin, bufsize - tokenBegin) +
new String(buffer, 0, bufpos + 1);
}
/** Get the suffix. */
public char[] GetSuffix(int len)
{
char[] ret = new char[len];
if ((bufpos + 1) >= len)
System.arraycopy(buffer, bufpos - len + 1, ret, 0, len);
else
{
System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0,
len - bufpos - 1);
System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1);
}
return ret;
}
/** Reset buffer when finished. */
public void Done()
{
buffer = null;
bufline = null;
bufcolumn = null;
}
/**
* Method to adjust line and column numbers for the start of a token.
*/
public void adjustBeginLineColumn(int newLine, int newCol)
{
int start = tokenBegin;
int len;
if (bufpos >= tokenBegin)
{
len = bufpos - tokenBegin + inBuf + 1;
}
else
{
len = bufsize - tokenBegin + bufpos + 1 + inBuf;
}
int i = 0, j = 0, k = 0;
int nextColDiff = 0, columnDiff = 0;
while (i < len && bufline[j = start % bufsize] == bufline[k = ++start % bufsize])
{
bufline[j] = newLine;
nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j];
bufcolumn[j] = newCol + columnDiff;
columnDiff = nextColDiff;
i++;
}
if (i < len)
{
bufline[j] = newLine++;
bufcolumn[j] = newCol + columnDiff;
while (i++ < len)
{
if (bufline[j = start % bufsize] != bufline[++start % bufsize])
bufline[j] = newLine++;
else
bufline[j] = newLine;
}
}
line = bufline[j];
column = bufcolumn[j];
}
}
/* JavaCC - OriginalChecksum=ce31feeb88a5437b2236b59d9470870c (do not edit this line) */

View File

@ -0,0 +1,96 @@
/* Generated By:JJTree: Do not edit this line. SimpleNode.java Version 4.3 */
/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
public
class SimpleNode implements Node {
protected Node parent;
protected Node[] children;
protected int id;
protected Object value;
protected FtScript parser;
public SimpleNode(int i) {
id = i;
}
public SimpleNode(FtScript p, int i) {
this(i);
parser = p;
}
public void jjtOpen() {
}
public void jjtClose() {
}
public void jjtSetParent(Node n) { parent = n; }
public Node jjtGetParent() { return parent; }
public void jjtAddChild(Node n, int i) {
if (children == null) {
children = new Node[i + 1];
} else if (i >= children.length) {
Node c[] = new Node[i + 1];
System.arraycopy(children, 0, c, 0, children.length);
children = c;
}
children[i] = n;
}
public Node jjtGetChild(int i) {
return children[i];
}
public int jjtGetNumChildren() {
return (children == null) ? 0 : children.length;
}
public void jjtSetValue(Object value) { this.value = value; }
public Object jjtGetValue() { return value; }
/** Accept the visitor. **/
public Object jjtAccept(FtScriptVisitor visitor, Object data)
{
return visitor.visit(this, data);
}
/** Accept the visitor. **/
public Object childrenAccept(FtScriptVisitor visitor, Object data)
{
if (children != null) {
for (int i = 0; i < children.length; ++i) {
children[i].jjtAccept(visitor, data);
}
}
return data;
}
/* You can override these two methods in subclasses of SimpleNode to
customize the way the node appears when the tree is dumped. If
your output uses more than one line you should override
toString(String), otherwise overriding toString() is probably all
you need to do. */
public String toString() { return FtScriptTreeConstants.jjtNodeName[id]; }
public String toString(String prefix) { return prefix + toString(); }
/* Override this method if you want to customize how the node dumps
out its children. */
public void dump(String prefix) {
System.out.println(toString(prefix));
if (children != null) {
for (int i = 0; i < children.length; ++i) {
SimpleNode n = (SimpleNode)children[i];
if (n != null) {
n.dump(prefix + " ");
}
}
}
}
}
/* JavaCC - OriginalChecksum=67cc365cba43ea3c43c2635579e8f356 (do not edit this line) */

View File

@ -0,0 +1,131 @@
/* Generated By:JavaCC: Do not edit this line. Token.java Version 5.0 */
/* JavaCCOptions:TOKEN_EXTENDS=,KEEP_LINE_COL=null,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
/**
* Describes the input token stream.
*/
public class Token implements java.io.Serializable {
/**
* The version identifier for this Serializable class.
* Increment only if the <i>serialized</i> form of the
* class changes.
*/
private static final long serialVersionUID = 1L;
/**
* An integer that describes the kind of this token. This numbering
* system is determined by JavaCCParser, and a table of these numbers is
* stored in the file ...Constants.java.
*/
public int kind;
/** The line number of the first character of this Token. */
public int beginLine;
/** The column number of the first character of this Token. */
public int beginColumn;
/** The line number of the last character of this Token. */
public int endLine;
/** The column number of the last character of this Token. */
public int endColumn;
/**
* The string image of the token.
*/
public String image;
/**
* A reference to the next regular (non-special) token from the input
* stream. If this is the last token from the input stream, or if the
* token manager has not read tokens beyond this one, this field is
* set to null. This is true only if this token is also a regular
* token. Otherwise, see below for a description of the contents of
* this field.
*/
public Token next;
/**
* This field is used to access special tokens that occur prior to this
* token, but after the immediately preceding regular (non-special) token.
* If there are no such special tokens, this field is set to null.
* When there are more than one such special token, this field refers
* to the last of these special tokens, which in turn refers to the next
* previous special token through its specialToken field, and so on
* until the first special token (whose specialToken field is null).
* The next fields of special tokens refer to other special tokens that
* immediately follow it (without an intervening regular token). If there
* is no such token, this field is null.
*/
public Token specialToken;
/**
* An optional attribute value of the Token.
* Tokens which are not used as syntactic sugar will often contain
* meaningful values that will be used later on by the compiler or
* interpreter. This attribute value is often different from the image.
* Any subclass of Token that actually wants to return a non-null value can
* override this method as appropriate.
*/
public Object getValue() {
return null;
}
/**
* No-argument constructor
*/
public Token() {}
/**
* Constructs a new token for the specified Image.
*/
public Token(int kind)
{
this(kind, null);
}
/**
* Constructs a new token for the specified Image and Kind.
*/
public Token(int kind, String image)
{
this.kind = kind;
this.image = image;
}
/**
* Returns the image.
*/
public String toString()
{
return image;
}
/**
* Returns a new Token object, by default. However, if you want, you
* can create and return subclass objects based on the value of ofKind.
* Simply add the cases to the switch for all those special cases.
* For example, if you have a subclass of Token called IDToken that
* you want to create if ofKind is ID, simply add something like :
*
* case MyParserConstants.ID : return new IDToken(ofKind, image);
*
* to the following switch statement. Then you can cast matchedToken
* variable to the appropriate type and use sit in your lexical actions.
*/
public static Token newToken(int ofKind, String image)
{
switch(ofKind)
{
default : return new Token(ofKind, image);
}
}
public static Token newToken(int ofKind)
{
return newToken(ofKind, null);
}
}
/* JavaCC - OriginalChecksum=d30698094e3526551e198c33b8e7086d (do not edit this line) */

View File

@ -0,0 +1,147 @@
/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 5.0 */
/* JavaCCOptions: */
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
/** Token Manager Error. */
public class TokenMgrError extends Error
{
/**
* The version identifier for this Serializable class.
* Increment only if the <i>serialized</i> form of the
* class changes.
*/
private static final long serialVersionUID = 1L;
/*
* Ordinals for various reasons why an Error of this type can be thrown.
*/
/**
* Lexical error occurred.
*/
static final int LEXICAL_ERROR = 0;
/**
* An attempt was made to create a second instance of a static token manager.
*/
static final int STATIC_LEXER_ERROR = 1;
/**
* Tried to change to an invalid lexical state.
*/
static final int INVALID_LEXICAL_STATE = 2;
/**
* Detected (and bailed out of) an infinite loop in the token manager.
*/
static final int LOOP_DETECTED = 3;
/**
* Indicates the reason why the exception is thrown. It will have
* one of the above 4 values.
*/
int errorCode;
/**
* Replaces unprintable characters by their escaped (or unicode escaped)
* equivalents in the given string
*/
protected static final String addEscapes(String str) {
StringBuffer retval = new StringBuffer();
char ch;
for (int i = 0; i < str.length(); i++) {
switch (str.charAt(i))
{
case 0 :
continue;
case '\b':
retval.append("\\b");
continue;
case '\t':
retval.append("\\t");
continue;
case '\n':
retval.append("\\n");
continue;
case '\f':
retval.append("\\f");
continue;
case '\r':
retval.append("\\r");
continue;
case '\"':
retval.append("\\\"");
continue;
case '\'':
retval.append("\\\'");
continue;
case '\\':
retval.append("\\\\");
continue;
default:
if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
String s = "0000" + Integer.toString(ch, 16);
retval.append("\\u" + s.substring(s.length() - 4, s.length()));
} else {
retval.append(ch);
}
continue;
}
}
return retval.toString();
}
/**
* Returns a detailed message for the Error when it is thrown by the
* token manager to indicate a lexical error.
* Parameters :
* EOFSeen : indicates if EOF caused the lexical error
* curLexState : lexical state in which this error occurred
* errorLine : line number when the error occurred
* errorColumn : column number when the error occurred
* errorAfter : prefix that was seen before this error occurred
* curchar : the offending character
* Note: You can customize the lexical error message by modifying this method.
*/
protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) {
return("Lexical error at line " +
errorLine + ", column " +
errorColumn + ". Encountered: " +
(EOFSeen ? "<EOF> " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") +
"after : \"" + addEscapes(errorAfter) + "\"");
}
/**
* You can also modify the body of this method to customize your error messages.
* For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not
* of end-users concern, so you can return something like :
*
* "Internal Error : Please file a bug report .... "
*
* from this method for such cases in the release version of your parser.
*/
public String getMessage() {
return super.getMessage();
}
/*
* Constructors of various flavors follow.
*/
/** No arg constructor. */
public TokenMgrError() {
}
/** Constructor with message and reason. */
public TokenMgrError(String message, int reason) {
super(message);
errorCode = reason;
}
/** Full Constructor. */
public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) {
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
/* JavaCC - OriginalChecksum=b7750665ed9570de389d9ad956321403 (do not edit this line) */

View File

@ -0,0 +1,909 @@
/*@bgen(jjtree) Generated By:JJTree: Do not edit this line. ft.jj */
/*@egen*//**
* JJTree file
* NODE_PACKAGE = "eu.dnetlib.data.collective.transformation.rulelanguage.node";
*/
options {
STATIC=false;
JDK_VERSION = "1.6";
}
PARSER_BEGIN(FtScript)
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
public class FtScript/*@bgen(jjtree)*/implements FtScriptTreeConstants/*@egen*/ {/*@bgen(jjtree)*/
protected JJTFtScriptState jjtree = new JJTFtScriptState();
/*@egen*/
}
PARSER_END(FtScript)
SKIP :
{
" "
| "\t"
| "\n"
| "\r"
| <"//" (~["\n","\r"])* ("\n"|"\r"|"\r\n")>
| <"/*" (~["*"])* "*" (~["/"] (~["*"])* "*")* "/">
}
MORE : {
"//" : IN_SINGLE_LINE_COMMENT }
< IN_SINGLE_LINE_COMMENT >
SPECIAL_TOKEN :
{
<SINGLE_LINE_COMMENT: "\n" | "\r" | "\r\n" > : DEFAULT
}
/* RESERVED WORDS AND LITERALS */
// KEYWORDS
TOKEN : { < IMPORT: "import" > }
TOKEN : { < TRANS: "trans" > }
TOKEN : { < IF: "if" > }
TOKEN : { < ELSE: "else" > }
TOKEN : { < END: "end" > }
TOKEN : { < EXTRACT: "Extract" > }
TOKEN : { < IDENTIFIEREXTRACT: "identifierExtract" > }
TOKEN : { < CONVERT: "Convert" > }
TOKEN : { < REGEXPR: "RegExpr" > }
TOKEN : { < GETVALUE: "getValue" > }
TOKEN : { < COPY: "copy" > }
TOKEN : { < DECLARE_NAMESPACE: "declare_ns" > }
TOKEN : { < DECLARE_SCRIPT: "declare_script" > }
TOKEN : { < DECLARE_SUBSCRIPT: "declare_subscript" > }
TOKEN : { < PREPROCESS: "preprocess" > }
TOKEN : { < SET: "set" > }
TOKEN : { < SKIPRECORD: "skipRecord" > }
TOKEN : { < SPLIT: "split" > }
TOKEN : { < STATIC: "static" > }
TOKEN : { < XPATH_SCHEME: "xpath:" > }
TOKEN : { < APPLY: "apply" > }
TOKEN : { < EMPTY: "empty" > }
TOKEN : { < DBLOOKUP: "dblookup" > }
TOKEN : { < BLACKLIST: "blacklist" > }
TOKEN : { < LOOKUP: "lookup" > }
TOKEN : { < CONCAT: "concat" > }
// SPEC. CHARS
TOKEN : { < SEMICOLON: ";" > }
TOKEN : { < RBRACKET: "]" > }
TOKEN : { < LBRACE: "{" > }
TOKEN : { < RBRACE: "}" > }
TOKEN : { < COMMA: "," > }
TOKEN : { < RPAREN: ")" > }
TOKEN : { < QUOTE: "'" > }
TOKEN :
{
< EQUAL: "=" > |
< NOTEQUAL: "!=" > |
< GT: " &gt; " | ">" > |
< LT: " &lt; " | "<" > |
< GTE: " &gt;= " | ">=" > |
< LTE: " &lt;= " | "<=" > |
< PLUS: "+" > |
< VBAR: "|" > |
// < SLASH_SLASH: "//" > |
< SLASH: "/" > |
< DOT_DOT: ".." > |
//< DOT: "." > |
< AT: "@" > |
< LPAREN: "(" > |
< LBRACKET: "[" > |
< DOLLAR_QNAME: "$" <IDENTIFIER> > |
< PERCENT_QNAME: "%" < IDENTIFIER > >
}
TOKEN :
{
< XPATH:
< XPATH_SCHEME > <QUOTED_STRING >
>
}
TOKEN :
{
< JOBCONST:
< JOBCONST_PREFIX > <LETTER_OR_DIGIT> (<LETTER_OR_DIGIT>)* >
| < #JOBCONST_PREFIX: "$job." > }
TOKEN: {
< QUOTED_STRING: "\"" (~["\""] )+ "\"" > }
TOKEN:
{
< SINGLE_QUOTE: "'"
(~["'"]
)+ "'" > }
TOKEN : /* STRING LITERALS*/
{
< CHARACTER_LITERAL:
"'"
( (~["'","\\","\n","\r"])
| ("\\"
( ["n","t","b","r","f","\\","'","\""]
| ["0"-"7"] ( ["0"-"7"] )?
| ["0"-"3"] ["0"-"7"] ["0"-"7"]
)
)
)
"'"
>
|
< STRING_LITERAL:
"\""
( (~["\"", "\\", "\n", "\r"])
| ("\\"
( ["n", "t", "b", "r", "f", "\\", "'", "\""]
| ["0"-"7"] ( ["0"-"7"] )?
| ["0"-"3"] ["0"-"7"] ["0"-"7"]
)
)
)*
"\""
>
}
TOKEN : {
< URI:
< IDENTIFIER > "://" <IDENTIFIER > ( "/" < IDENTIFIER > )* ( "/" )?
> }
TOKEN : /* IDENTIFIERS */
{
< IDENTIFIER:
( < AT >|<LETTER_OR_DIGIT>) (<LETTER_OR_DIGIT>|<MINUS>|< DOT >|< COLON >|< AT >)* >
| < #LETTER_OR_DIGIT: ["_","a"-"z","A"-"Z","0"-"9"] >
| < #MINUS: "-" >
| < #DOT: "." >
| < #COLON: ":" >
//| < #AT: "@" >
}
ASTStart Start() :{/*@bgen(jjtree) Start */
ASTStart jjtn000 = new ASTStart(JJTSTART);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/}
{/*@bgen(jjtree) Start */
try {
/*@egen*/
(script())/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/
{ return jjtn000; }/*@bgen(jjtree)*/
} catch (Throwable jjte000) {
if (jjtc000) {
jjtree.clearNodeScope(jjtn000);
jjtc000 = false;
} else {
jjtree.popNode();
}
if (jjte000 instanceof RuntimeException) {
throw (RuntimeException)jjte000;
}
if (jjte000 instanceof ParseException) {
throw (ParseException)jjte000;
}
throw (Error)jjte000;
} finally {
if (jjtc000) {
jjtree.closeNodeScope(jjtn000, true);
}
}
/*@egen*/
}
/**
* id = value
*/
void assign(Rules r) : {/*@bgen(jjtree) MyAssign */
ASTMyAssign jjtn000 = new ASTMyAssign(JJTMYASSIGN);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/String value = ""; Token t;}
{/*@bgen(jjtree) MyAssign */
try {
/*@egen*/
{jjtn000.setRule(r);}
(t = inputField()/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/ {jjtn000.setFieldExpression(t.image);}
|value = quotedString()/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/ {jjtn000.setAttribute(value);}
)/*@bgen(jjtree)*/
} catch (Throwable jjte000) {
if (jjtc000) {
jjtree.clearNodeScope(jjtn000);
jjtc000 = false;
} else {
jjtree.popNode();
}
if (jjte000 instanceof RuntimeException) {
throw (RuntimeException)jjte000;
}
if (jjte000 instanceof ParseException) {
throw (ParseException)jjte000;
}
throw (Error)jjte000;
} finally {
if (jjtc000) {
jjtree.closeNodeScope(jjtn000, true);
}
}
/*@egen*/
}
void attrib_list() : {}
{
<LBRACKET>
(attribute())*
<RBRACKET>
}
void attribute() : {/*@bgen(jjtree) MyAttribute */
ASTMyAttribute jjtn000 = new ASTMyAttribute(JJTMYATTRIBUTE);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/String value; Token t=null;}
{/*@bgen(jjtree) MyAttribute */
try {
/*@egen*/
(value = identifier() { jjtn000.setAttributeValue(value);}
|t = inputField()
{ value = t.image;
jjtn000.setAttributeInputField(value);
}
)
[<COMMA>]/*@bgen(jjtree)*/
} catch (Throwable jjte000) {
if (jjtc000) {
jjtree.clearNodeScope(jjtn000);
jjtc000 = false;
} else {
jjtree.popNode();
}
if (jjte000 instanceof RuntimeException) {
throw (RuntimeException)jjte000;
}
if (jjte000 instanceof ParseException) {
throw (ParseException)jjte000;
}
throw (Error)jjte000;
} finally {
if (jjtc000) {
jjtree.closeNodeScope(jjtn000, true);
}
}
/*@egen*/
}
/**
* a conditional Rule, which contains the condition, the Rule on which the condition holds and the alternative rule
*/
void conditionalStmt() : {/*@bgen(jjtree) MyCondition */
ASTMyCondition jjtn000 = new ASTMyCondition(JJTMYCONDITION);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/Rules r1 = new Rules(); Rules r2 = new Rules(); Token cond; Token apply;}
{/*@bgen(jjtree) MyCondition */
try {
/*@egen*/
[< APPLY > apply = inputField() {
jjtn000.setApplyExpression(apply.image); } ]
< IF > cond = inputField() { jjtn000.setConditionalExpression(cond.image); jjtn000.setPrimaryRule(r1); jjtn000.setSecondaryRule(r2); }
rule(r1)
< ELSE >
rule(r2)/*@bgen(jjtree)*/
} catch (Throwable jjte000) {
if (jjtc000) {
jjtree.clearNodeScope(jjtn000);
jjtc000 = false;
} else {
jjtree.popNode();
}
if (jjte000 instanceof RuntimeException) {
throw (RuntimeException)jjte000;
}
if (jjte000 instanceof ParseException) {
throw (ParseException)jjte000;
}
throw (Error)jjte000;
} finally {
if (jjtc000) {
jjtree.closeNodeScope(jjtn000, true);
}
}
/*@egen*/
}
void empty(Rules r) : {/*@bgen(jjtree) MyEmpty */
ASTMyEmpty jjtn000 = new ASTMyEmpty(JJTMYEMPTY);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/}
{/*@bgen(jjtree) MyEmpty */
try {
/*@egen*/
{jjtn000.setRule(r);}
< EMPTY >/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/ {jjtn000.setEmpty(true);}/*@bgen(jjtree)*/
} finally {
if (jjtc000) {
jjtree.closeNodeScope(jjtn000, true);
}
}
/*@egen*/
}
String identifier() : {Token t;}
{
t = <IDENTIFIER> {return t.image;}
}
void importDeclaration() :
{/*@bgen(jjtree) MyImport */
ASTMyImport jjtn000 = new ASTMyImport(JJTMYIMPORT);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/String scriptName;}
{/*@bgen(jjtree) MyImport */
try {
/*@egen*/
<IMPORT> <LPAREN> scriptName = identifier() <RPAREN> ";"/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/ { jjtn000.setScriptName(scriptName); }/*@bgen(jjtree)*/
} catch (Throwable jjte000) {
if (jjtc000) {
jjtree.clearNodeScope(jjtn000);
jjtc000 = false;
} else {
jjtree.popNode();
}
if (jjte000 instanceof RuntimeException) {
throw (RuntimeException)jjte000;
}
if (jjte000 instanceof ParseException) {
throw (ParseException)jjte000;
}
throw (Error)jjte000;
} finally {
if (jjtc000) {
jjtree.closeNodeScope(jjtn000, true);
}
}
/*@egen*/
}
Token anyExpression() : { Token t; }
{
t = < IDENTIFIER > {
return t; } }
Token inputField() :{ Token t; }
{
t = < XPATH >
{
return t;
}
| t = < JOBCONST > {
return t; }
| t = < DOLLAR_QNAME >
{
return t;
}
}
/*
* becomes obsolete
*/
String key() : {String key;}
{
key = identifier()
<EQUAL>
{return key;}
}
void literal() :
{}
{
< CHARACTER_LITERAL > | < STRING_LITERAL >
}
void nsDeclaration() :
{/*@bgen(jjtree) MyNs */
ASTMyNs jjtn000 = new ASTMyNs(JJTMYNS);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/String nsPrefix; String nsUri;}
{/*@bgen(jjtree) MyNs */
try {
/*@egen*/
< DECLARE_NAMESPACE > nsPrefix = identifier() < EQUAL > nsUri = quotedString() < SEMICOLON >/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/
{
jjtn000.setNsDeclaration(nsPrefix, nsUri);
}/*@bgen(jjtree)*/
} catch (Throwable jjte000) {
if (jjtc000) {
jjtree.clearNodeScope(jjtn000);
jjtc000 = false;
} else {
jjtree.popNode();
}
if (jjte000 instanceof RuntimeException) {
throw (RuntimeException)jjte000;
}
if (jjte000 instanceof ParseException) {
throw (ParseException)jjte000;
}
throw (Error)jjte000;
} finally {
if (jjtc000) {
jjtree.closeNodeScope(jjtn000, true);
}
}
/*@egen*/
}
void preprocess() :{/*@bgen(jjtree) MyPreprocess */
ASTMyPreprocess jjtn000 = new ASTMyPreprocess(JJTMYPREPROCESS);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/String preprocessId;} {/*@bgen(jjtree) MyPreprocess */
try {
/*@egen*/
( < PREPROCESS >
preprocessId = identifier() < EQUAL >
< DBLOOKUP >
< LPAREN > { String sqlExpr; }
sqlExpr = quotedString()
< RPAREN > { jjtn000.preprocess(preprocessId, "dblookup", sqlExpr); }
< SEMICOLON >
) |
(
< BLACKLIST >
< LPAREN > { String blacklistDataSourceId; }
blacklistDataSourceId = quotedString()
< RPAREN > { jjtn000.preprocess("blacklist", blacklistDataSourceId); }
< SEMICOLON > )/*@bgen(jjtree)*/
} catch (Throwable jjte000) {
if (jjtc000) {
jjtree.clearNodeScope(jjtn000);
jjtc000 = false;
} else {
jjtree.popNode();
}
if (jjte000 instanceof RuntimeException) {
throw (RuntimeException)jjte000;
}
if (jjte000 instanceof ParseException) {
throw (ParseException)jjte000;
}
throw (Error)jjte000;
} finally {
if (jjtc000) {
jjtree.closeNodeScope(jjtn000, true);
}
}
/*@egen*/ }
void set(Rules r) : {/*@bgen(jjtree) MySet */
ASTMySet jjtn000 = new ASTMySet(JJTMYSET);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/Token expr; String value = "";}
{/*@bgen(jjtree) MySet */
try {
/*@egen*/
{ jjtn000.setRule(r); }
< SET >
< LPAREN >
(
expr = inputField() { jjtn000.setValueExpression(expr.image); }
| value = quotedString() { jjtn000.setAttribute(value); }
)
(
< COMMA >
{
r = new Rules();
}
rule(r)
{
jjtn000.addAttributeRule(r);
}
)*
< RPAREN >/*@bgen(jjtree)*/
} catch (Throwable jjte000) {
if (jjtc000) {
jjtree.clearNodeScope(jjtn000);
jjtc000 = false;
} else {
jjtree.popNode();
}
if (jjte000 instanceof RuntimeException) {
throw (RuntimeException)jjte000;
}
if (jjte000 instanceof ParseException) {
throw (ParseException)jjte000;
}
throw (Error)jjte000;
} finally {
if (jjtc000) {
jjtree.closeNodeScope(jjtn000, true);
}
}
/*@egen*/
}
void skip(Rules r) :{/*@bgen(jjtree) MySkip */
ASTMySkip jjtn000 = new ASTMySkip(JJTMYSKIP);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/}
{/*@bgen(jjtree) MySkip */
try {
/*@egen*/
{ jjtn000.setRule(r); }
< SKIPRECORD > {jjtn000.skipRecord();}
< LPAREN >
< RPAREN >/*@bgen(jjtree)*/
} finally {
if (jjtc000) {
jjtree.closeNodeScope(jjtn000, true);
}
}
/*@egen*/ }
void copy(Rules r) : {/*@bgen(jjtree) MyCopy */
ASTMyCopy jjtn000 = new ASTMyCopy(JJTMYCOPY);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/}
{/*@bgen(jjtree) MyCopy */
try {
/*@egen*/ { jjtn000.setRule(r); }
< COPY >
< LPAREN > { String templateMatchExpression; String applyTemplateSelectExpression; String copySelectExpression; }
templateMatchExpression = quotedString()
< COMMA >
applyTemplateSelectExpression = quotedString()
< COMMA >
copySelectExpression = quotedString()
< RPAREN >/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/ { jjtn000.copy(templateMatchExpression, applyTemplateSelectExpression, copySelectExpression); }/*@bgen(jjtree)*/
} catch (Throwable jjte000) {
if (jjtc000) {
jjtree.clearNodeScope(jjtn000);
jjtc000 = false;
} else {
jjtree.popNode();
}
if (jjte000 instanceof RuntimeException) {
throw (RuntimeException)jjte000;
}
if (jjte000 instanceof ParseException) {
throw (ParseException)jjte000;
}
throw (Error)jjte000;
} finally {
if (jjtc000) {
jjtree.closeNodeScope(jjtn000, true);
}
}
/*@egen*/
}
void op(Rules r) : {/*@bgen(jjtree) MyOp */
ASTMyOp jjtn000 = new ASTMyOp(JJTMYOP);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/String functionName = ""; Token expr; Token expr2; Token vocab; String defaultPattern = ""; String function = ""; String elementName = ""; String regExpr; Token feature;}
{/*@bgen(jjtree) MyOp */
try {
/*@egen*/
{jjtn000.setRule(r);}
( (<GETVALUE>
<LPAREN>
functionName = identifier() <COMMA> attrib_list()
<RPAREN>/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/ {jjtn000.getValue(functionName);}
)
| (<CONVERT>
<LPAREN>
expr = inputField()
<COMMA>
vocab = <IDENTIFIER>
(
<COMMA>
defaultPattern = quotedString()
<COMMA>
function = quotedString() )*
<RPAREN>/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/ {jjtn000.convert(expr.image, vocab.image, defaultPattern, function);}
)
| (< EXTRACT >
< LPAREN >
feature = < IDENTIFIER >
< RPAREN >/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/ {jjtn000.extract(feature.image); } )
| (<REGEXPR>
<LPAREN>
expr = inputField()
<COMMA>
expr2 = inputField()
<COMMA>
regExpr = quotedString()
<RPAREN>/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/ {jjtn000.regExpr(expr.image, expr2.image, regExpr);}
)
| (<SPLIT>
< LPAREN >
expr = inputField()
< COMMA >
elementName = quotedString()
< COMMA >
regExpr = quotedString()
< RPAREN >/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/ {jjtn000.split(expr.image, elementName, regExpr);}
)
| ( < LOOKUP > { String propertyKey; }
< LPAREN >
expr = inputField()
< COMMA >
propertyKey = quotedString()
< RPAREN >/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/ { jjtn000.lookup(expr.image, propertyKey); }
)
| ( < IDENTIFIEREXTRACT > { String xpathExprJsonString; Token xpathExprInputSource; }
< LPAREN >
// "{//abc, //def }" ?json list of xpath-expr // xpath-expr of input source
// regExpr xpathExprJsonString = singleQuotedString()
< COMMA >
xpathExprInputSource = inputField()
< COMMA >
regExpr = singleQuotedString()
< RPAREN >/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/ { jjtn000.identifierExtract(xpathExprJsonString, xpathExprInputSource.image, regExpr); }
)
| ( < CONCAT > { jjtn000.concat(); }
< LPAREN > { String v; Token t; }
( v = quotedString() { jjtn000.addConcat(v); } | t = < DOLLAR_QNAME > { jjtn000.addConcat(t.image); }
)
(
< COMMA >
( v = quotedString() { jjtn000.addConcat(v); } | t = < DOLLAR_QNAME > { jjtn000.addConcat(t.image); }
) )*
< RPAREN > )
)/*@bgen(jjtree)*/
} catch (Throwable jjte000) {
if (jjtc000) {
jjtree.clearNodeScope(jjtn000);
jjtc000 = false;
} else {
jjtree.popNode();
}
if (jjte000 instanceof RuntimeException) {
throw (RuntimeException)jjte000;
}
if (jjte000 instanceof ParseException) {
throw (ParseException)jjte000;
}
throw (Error)jjte000;
} finally {
if (jjtc000) {
jjtree.closeNodeScope(jjtn000, true);
}
}
/*@egen*/
}
String leftExprOutputField() :{ String leftExpr; }
{
leftExpr = identifier() < EQUAL > { return leftExpr; }
}
String leftExprVar() :{ String leftExpr; } {
leftExpr = variable() < EQUAL > { return leftExpr; } }
String leftExprTemplate() :{ String leftExpr; }
{
leftExpr = template() < EQUAL > { return leftExpr; } }
String singleQuotedString() : { Token t;} {
t =
< SINGLE_QUOTE >
{ return t.image; } }
String quotedString() : {Token t;}
{
t =
< QUOTED_STRING >
{return t.image;}
}
void rule(Rules r) : { String ruleDecl; String leftExpr;}
{
[ ruleDecl = ruleDecl() { r.setRuleDeclaration(ruleDecl); } ]
( leftExpr = leftExprOutputField()
{
String[] fieldArray = leftExpr.split("@");
r.setTargetField(fieldArray[0]);
if (fieldArray.length > 1) {
r.setAttribute(fieldArray[1]); }
}
| leftExpr = leftExprVar() { r.setVariable(leftExpr); }
| leftExpr = leftExprTemplate() { r.setTemplate(leftExpr); } )
(
LOOKAHEAD(2)
assign(r)
| op(r)
| set(r)
| copy(r)
| empty(r)
| skip(r)
) < SEMICOLON >
}
String ruleDecl() : {Token t;}
{
(
t = < STATIC >
)
{ return t.image;
}
}
void script() :{}
{
scriptDeclaration()
[ importDeclaration() ]
(
nsDeclaration()
)*
(
preprocess() )*
( stmt() | conditionalStmt() )*
( <END> <EOF> )
}
void scriptDeclaration() :
{/*@bgen(jjtree) MyScript */
ASTMyScript jjtn000 = new ASTMyScript(JJTMYSCRIPT);
boolean jjtc000 = true;
jjtree.openNodeScope(jjtn000);
/*@egen*/String scriptName; ASTMyScript.SCRIPTTYPE scriptType;}
{/*@bgen(jjtree) MyScript */
try {
/*@egen*/
( < DECLARE_SCRIPT > {
jjtn000.setScriptType(ASTMyScript.SCRIPTTYPE.MAINSCRIPT); }
| < DECLARE_SUBSCRIPT > {
jjtn000.setScriptType(ASTMyScript.SCRIPTTYPE.SUBSCRIPT); } ) scriptName = quotedString() < SEMICOLON >/*@bgen(jjtree)*/
{
jjtree.closeNodeScope(jjtn000, true);
jjtc000 = false;
}
/*@egen*/
{
jjtn000.setScript(scriptName);
}/*@bgen(jjtree)*/
} catch (Throwable jjte000) {
if (jjtc000) {
jjtree.clearNodeScope(jjtn000);
jjtc000 = false;
} else {
jjtree.popNode();
}
if (jjte000 instanceof RuntimeException) {
throw (RuntimeException)jjte000;
}
if (jjte000 instanceof ParseException) {
throw (ParseException)jjte000;
}
throw (Error)jjte000;
} finally {
if (jjtc000) {
jjtree.closeNodeScope(jjtn000, true);
}
}
/*@egen*/
}
void stmt() :{Rules r = new Rules();}
{
rule(r)
}
String string() : {Token t;}
{
t = <STRING_LITERAL> {return t.image.substring(1, t.image.length()-1);}
}
String variable() : { Token t; }
{
t = < DOLLAR_QNAME > { return t.image; }
}
String template() : { Token t; }
{
t = < PERCENT_QNAME > { return t.image; } }

View File

@ -0,0 +1,520 @@
/**
* JJTree file
* NODE_PACKAGE = "eu.dnetlib.data.collective.transformation.rulelanguage.node";
*/
options {
STATIC=false;
MULTI = true;
VISITOR = true;
JDK_VERSION = "1.6";
}
PARSER_BEGIN(FtScript)
package eu.dnetlib.data.collective.transformation.rulelanguage.parser;
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
public class FtScript {
}
PARSER_END(FtScript)
SKIP :
{
" "
| "\t"
| "\n"
| "\r"
| <"//" (~["\n","\r"])* ("\n"|"\r"|"\r\n")>
| <"/*" (~["*"])* "*" (~["/"] (~["*"])* "*")* "/">
}
MORE : {
"//" : IN_SINGLE_LINE_COMMENT }
< IN_SINGLE_LINE_COMMENT >
SPECIAL_TOKEN :
{
<SINGLE_LINE_COMMENT: "\n" | "\r" | "\r\n" > : DEFAULT
}
/* RESERVED WORDS AND LITERALS */
// KEYWORDS
TOKEN : { < IMPORT: "import" > }
TOKEN : { < TRANS: "trans" > }
TOKEN : { < IF: "if" > }
TOKEN : { < ELSE: "else" > }
TOKEN : { < END: "end" > }
TOKEN : { < EXTRACT: "Extract" > }
TOKEN : { < IDENTIFIEREXTRACT: "identifierExtract" > }
TOKEN : { < CONVERT: "Convert" > }
TOKEN : { < REGEXPR: "RegExpr" > }
TOKEN : { < GETVALUE: "getValue" > }
TOKEN : { < COPY: "copy" > }
TOKEN : { < DECLARE_NAMESPACE: "declare_ns" > }
TOKEN : { < DECLARE_SCRIPT: "declare_script" > }
TOKEN : { < DECLARE_SUBSCRIPT: "declare_subscript" > }
TOKEN : { < PREPROCESS: "preprocess" > }
TOKEN : { < SET: "set" > }
TOKEN : { < SKIPRECORD: "skipRecord" > }
TOKEN : { < SPLIT: "split" > }
TOKEN : { < STATIC: "static" > }
TOKEN : { < XPATH_SCHEME: "xpath:" > }
TOKEN : { < APPLY: "apply" > }
TOKEN : { < EMPTY: "empty" > }
TOKEN : { < DBLOOKUP: "dblookup" > }
TOKEN : { < BLACKLIST: "blacklist" > }
TOKEN : { < LOOKUP: "lookup" > }
TOKEN : { < CONCAT: "concat" > }
// SPEC. CHARS
TOKEN : { < SEMICOLON: ";" > }
TOKEN : { < RBRACKET: "]" > }
TOKEN : { < LBRACE: "{" > }
TOKEN : { < RBRACE: "}" > }
TOKEN : { < COMMA: "," > }
TOKEN : { < RPAREN: ")" > }
TOKEN : { < QUOTE: "'" > }
TOKEN :
{
< EQUAL: "=" > |
< NOTEQUAL: "!=" > |
< GT: " &gt; " | ">" > |
< LT: " &lt; " | "<" > |
< GTE: " &gt;= " | ">=" > |
< LTE: " &lt;= " | "<=" > |
< PLUS: "+" > |
< VBAR: "|" > |
// < SLASH_SLASH: "//" > |
< SLASH: "/" > |
< DOT_DOT: ".." > |
//< DOT: "." > |
< AT: "@" > |
< LPAREN: "(" > |
< LBRACKET: "[" > |
< DOLLAR_QNAME: "$" <IDENTIFIER> > |
< PERCENT_QNAME: "%" < IDENTIFIER > >
}
TOKEN :
{
< XPATH:
< XPATH_SCHEME > <QUOTED_STRING >
>
}
TOKEN :
{
< JOBCONST:
< JOBCONST_PREFIX > <LETTER_OR_DIGIT> (<LETTER_OR_DIGIT>)* >
| < #JOBCONST_PREFIX: "$job." > }
TOKEN: {
< QUOTED_STRING: "\"" (~["\""] )+ "\"" > }
TOKEN:
{
< SINGLE_QUOTE: "'"
(~["'"]
)+ "'" > }
TOKEN : /* STRING LITERALS*/
{
< CHARACTER_LITERAL:
"'"
( (~["'","\\","\n","\r"])
| ("\\"
( ["n","t","b","r","f","\\","'","\""]
| ["0"-"7"] ( ["0"-"7"] )?
| ["0"-"3"] ["0"-"7"] ["0"-"7"]
)
)
)
"'"
>
|
< STRING_LITERAL:
"\""
( (~["\"", "\\", "\n", "\r"])
| ("\\"
( ["n", "t", "b", "r", "f", "\\", "'", "\""]
| ["0"-"7"] ( ["0"-"7"] )?
| ["0"-"3"] ["0"-"7"] ["0"-"7"]
)
)
)*
"\""
>
}
TOKEN : {
< URI:
< IDENTIFIER > "://" <IDENTIFIER > ( "/" < IDENTIFIER > )* ( "/" )?
> }
TOKEN : /* IDENTIFIERS */
{
< IDENTIFIER:
( < AT >|<LETTER_OR_DIGIT>) (<LETTER_OR_DIGIT>|<MINUS>|< DOT >|< COLON >|< AT >)* >
| < #LETTER_OR_DIGIT: ["_","a"-"z","A"-"Z","0"-"9"] >
| < #MINUS: "-" >
| < #DOT: "." >
| < #COLON: ":" >
//| < #AT: "@" >
}
ASTStart Start() :{}
{
(script())
{ return jjtThis; }
}
/**
* id = value
*/
void assign(Rules r) #MyAssign : {String value = ""; Token t;}
{
{jjtThis.setRule(r);}
(t = inputField() {jjtThis.setFieldExpression(t.image);}
|value = quotedString() {jjtThis.setAttribute(value);}
)
}
void attrib_list() #void : {}
{
<LBRACKET>
(attribute())*
<RBRACKET>
}
void attribute() #MyAttribute : {String value; Token t=null;}
{
(value = identifier() { jjtThis.setAttributeValue(value);}
|t = inputField()
{ value = t.image;
jjtThis.setAttributeInputField(value);
}
)
[<COMMA>]
}
/**
* a conditional Rule, which contains the condition, the Rule on which the condition holds and the alternative rule
*/
void conditionalStmt() #MyCondition : {Rules r1 = new Rules(); Rules r2 = new Rules(); Token cond; Token apply;}
{
[< APPLY > apply = inputField() {
jjtThis.setApplyExpression(apply.image); } ]
< IF > cond = inputField() { jjtThis.setConditionalExpression(cond.image); jjtThis.setPrimaryRule(r1); jjtThis.setSecondaryRule(r2); }
rule(r1)
< ELSE >
rule(r2)
}
void empty(Rules r) #MyEmpty : {}
{
{jjtThis.setRule(r);}
< EMPTY > {jjtThis.setEmpty(true);}
}
String identifier() #void : {Token t;}
{
t = <IDENTIFIER> {return t.image;}
}
void importDeclaration() #MyImport :
{String scriptName;}
{
<IMPORT> <LPAREN> scriptName = identifier() <RPAREN> ";" { jjtThis.setScriptName(scriptName); }
}
Token anyExpression() #void : { Token t; }
{
t = < IDENTIFIER > {
return t; } }
Token inputField() #void :{ Token t; }
{
t = < XPATH >
{
return t;
}
| t = < JOBCONST > {
return t; }
| t = < DOLLAR_QNAME >
{
return t;
}
}
/*
* becomes obsolete
*/
String key() #void : {String key;}
{
key = identifier()
<EQUAL>
{return key;}
}
void literal() #void :
{}
{
< CHARACTER_LITERAL > | < STRING_LITERAL >
}
void nsDeclaration() #MyNs :
{String nsPrefix; String nsUri;}
{
< DECLARE_NAMESPACE > nsPrefix = identifier() < EQUAL > nsUri = quotedString() < SEMICOLON >
{
jjtThis.setNsDeclaration(nsPrefix, nsUri);
}
}
void preprocess() #MyPreprocess :{String preprocessId;} {
( < PREPROCESS >
preprocessId = identifier() < EQUAL >
< DBLOOKUP >
< LPAREN > { String sqlExpr; }
sqlExpr = quotedString()
< RPAREN > { jjtThis.preprocess(preprocessId, "dblookup", sqlExpr); }
< SEMICOLON >
) |
(
< BLACKLIST >
< LPAREN > { String blacklistDataSourceId; }
blacklistDataSourceId = quotedString()
< RPAREN > { jjtThis.preprocess("blacklist", blacklistDataSourceId); }
< SEMICOLON > ) }
void set(Rules r) #MySet : {Token expr; String value = "";}
{
{ jjtThis.setRule(r); }
< SET >
< LPAREN >
(
expr = inputField() { jjtThis.setValueExpression(expr.image); }
| value = quotedString() { jjtThis.setAttribute(value); }
)
(
< COMMA >
{
r = new Rules();
}
rule(r)
{
jjtThis.addAttributeRule(r);
}
)*
< RPAREN >
}
void skip(Rules r) #MySkip :{}
{
{ jjtThis.setRule(r); }
< SKIPRECORD > {jjtThis.skipRecord();}
< LPAREN >
< RPAREN > }
void copy(Rules r) #MyCopy : {}
{ { jjtThis.setRule(r); }
< COPY >
< LPAREN > { String templateMatchExpression; String applyTemplateSelectExpression; String copySelectExpression; }
templateMatchExpression = quotedString()
< COMMA >
applyTemplateSelectExpression = quotedString()
< COMMA >
copySelectExpression = quotedString()
< RPAREN > { jjtThis.copy(templateMatchExpression, applyTemplateSelectExpression, copySelectExpression); }
}
void op(Rules r) #MyOp : {String functionName = ""; Token expr; Token expr2; Token vocab; String defaultPattern = ""; String function = ""; String elementName = ""; String regExpr; Token feature;}
{
{jjtThis.setRule(r);}
( (<GETVALUE>
<LPAREN>
functionName = identifier() <COMMA> attrib_list()
<RPAREN> {jjtThis.getValue(functionName);}
)
| (<CONVERT>
<LPAREN>
expr = inputField()
<COMMA>
vocab = <IDENTIFIER>
(
<COMMA>
defaultPattern = quotedString()
<COMMA>
function = quotedString() )*
<RPAREN> {jjtThis.convert(expr.image, vocab.image, defaultPattern, function);}
)
| (< EXTRACT >
< LPAREN >
feature = < IDENTIFIER >
< RPAREN > {jjtThis.extract(feature.image); } )
| (<REGEXPR>
<LPAREN>
expr = inputField()
<COMMA>
expr2 = inputField()
<COMMA>
regExpr = quotedString()
<RPAREN> {jjtThis.regExpr(expr.image, expr2.image, regExpr);}
)
| (<SPLIT>
< LPAREN >
expr = inputField()
< COMMA >
elementName = quotedString()
< COMMA >
regExpr = quotedString()
< RPAREN > {jjtThis.split(expr.image, elementName, regExpr);}
)
| ( < LOOKUP > { String propertyKey; }
< LPAREN >
expr = inputField()
< COMMA >
propertyKey = quotedString()
< RPAREN > { jjtThis.lookup(expr.image, propertyKey); }
)
| ( < IDENTIFIEREXTRACT > { String xpathExprJsonString; Token xpathExprInputSource; }
< LPAREN >
// "{//abc, //def }" ?json list of xpath-expr // xpath-expr of input source
// regExpr xpathExprJsonString = singleQuotedString()
< COMMA >
xpathExprInputSource = inputField()
< COMMA >
regExpr = singleQuotedString()
< RPAREN > { jjtThis.identifierExtract(xpathExprJsonString, xpathExprInputSource.image, regExpr); }
)
| ( < CONCAT > { jjtThis.concat(); }
< LPAREN > { String v; Token t; }
( v = quotedString() { jjtThis.addConcat(v); } | t = < DOLLAR_QNAME > { jjtThis.addConcat(t.image); }
)
(
< COMMA >
( v = quotedString() { jjtThis.addConcat(v); } | t = < DOLLAR_QNAME > { jjtThis.addConcat(t.image); }
) )*
< RPAREN > )
)
}
String leftExprOutputField() #void :{ String leftExpr; }
{
leftExpr = identifier() < EQUAL > { return leftExpr; }
}
String leftExprVar() #void :{ String leftExpr; } {
leftExpr = variable() < EQUAL > { return leftExpr; } }
String leftExprTemplate() #void :{ String leftExpr; }
{
leftExpr = template() < EQUAL > { return leftExpr; } }
String singleQuotedString() #void : { Token t;} {
t =
< SINGLE_QUOTE >
{ return t.image; } }
String quotedString() #void : {Token t;}
{
t =
< QUOTED_STRING >
{return t.image;}
}
void rule(Rules r) #void : { String ruleDecl; String leftExpr;}
{
[ ruleDecl = ruleDecl() { r.setRuleDeclaration(ruleDecl); } ]
( leftExpr = leftExprOutputField()
{
String[] fieldArray = leftExpr.split("@");
r.setTargetField(fieldArray[0]);
if (fieldArray.length > 1) {
r.setAttribute(fieldArray[1]); }
}
| leftExpr = leftExprVar() { r.setVariable(leftExpr); }
| leftExpr = leftExprTemplate() { r.setTemplate(leftExpr); } )
(
LOOKAHEAD(2)
assign(r)
| op(r)
| set(r)
| copy(r)
| empty(r)
| skip(r)
) < SEMICOLON >
}
String ruleDecl() #void : {Token t;}
{
(
t = < STATIC >
)
{ return t.image;
}
}
void script() #void :{}
{
scriptDeclaration()
[ importDeclaration() ]
(
nsDeclaration()
)*
(
preprocess() )*
( stmt() | conditionalStmt() )*
( <END> <EOF> )
}
void scriptDeclaration() #MyScript :
{String scriptName; ASTMyScript.SCRIPTTYPE scriptType;}
{
( < DECLARE_SCRIPT > {
jjtThis.setScriptType(ASTMyScript.SCRIPTTYPE.MAINSCRIPT); }
| < DECLARE_SUBSCRIPT > {
jjtThis.setScriptType(ASTMyScript.SCRIPTTYPE.SUBSCRIPT); } ) scriptName = quotedString() < SEMICOLON >
{
jjtThis.setScript(scriptName);
}
}
void stmt() #void :{Rules r = new Rules();}
{
rule(r)
}
String string() #void : {Token t;}
{
t = <STRING_LITERAL> {return t.image.substring(1, t.image.length()-1);}
}
String variable() #void : { Token t; }
{
t = < DOLLAR_QNAME > { return t.image; }
}
String template() #void : { Token t; }
{
t = < PERCENT_QNAME > { return t.image; } }

View File

@ -0,0 +1,67 @@
package eu.dnetlib.data.collective.transformation.rulelanguage.util;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
/**
* @author jochen
*
*/
public class Converter {
private static final String xpathExpr = "xpath:\"";
//private static final String labelExpr = "label:";
public static String getXpathFromLabelExpr(String aElement){
// TODO validate the argument -> consisting of 3 tokens, delimited by dot
StringTokenizer tokenizer = new StringTokenizer(aElement, ".");
List<String> tokenList = new LinkedList<String>();
while (tokenizer.hasMoreTokens()){
tokenList.add(tokenizer.nextToken());
}
StringBuilder builder = new StringBuilder();
builder.append("//"); // the xpath-expr
builder.append(tokenList.get(0) + ":"); // the namespace
builder.append(tokenList.get(2)); // the elementname
return builder.toString();
}
/**
* extracts a xpath-expression made in a production rule
* @param aElement
* @return xpath expression
*/
public static String getXpathFromXpathExpr(String aElement){
String xpath = "";
if (aElement.startsWith(xpathExpr)){
xpath = aElement.substring(xpathExpr.length(), aElement.length() - 1);
}
return xpath;
}
public static boolean isXpathReturningString(String aXpathExpr){
String[] functions = {"concat", "normalize-space", "translate", "substring"};
for (String fct: functions)
if (aXpathExpr.startsWith(fct)) return true;
return false;
}
public static String getUnquotedString(String aValue){
return aValue.substring(1, aValue.length() - 1);
}
/**
* returns a list of name-space declarations used in xsl
* @param nsPrefixMap - a map of name-space prefixes and their uris
* @return list of name-space declarations
*/
public static List<String> getBoundPrefixes(Map<String, String> nsPrefixMap){
List<String> nsList = new LinkedList<String>();
for (String key: nsPrefixMap.keySet()){
nsList.add("xmlns:" + key + "=" + "\"" + nsPrefixMap.get(key) + "\" ");
}
return nsList;
}
}

View File

@ -0,0 +1,146 @@
package eu.dnetlib.data.collective.transformation.rulelanguage.util;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import org.apache.commons.lang3.StringEscapeUtils;
import eu.dnetlib.data.collective.transformation.core.xsl.XsltConstants;
import eu.dnetlib.data.collective.transformation.engine.functions.Convert;
import eu.dnetlib.data.collective.transformation.engine.functions.IdentifierExtract;
import eu.dnetlib.data.collective.transformation.engine.functions.Lookup;
import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression;
import eu.dnetlib.data.collective.transformation.engine.functions.Split;
import eu.dnetlib.data.collective.transformation.rulelanguage.Argument;
/**
* TODO: make this class abstract and function classes (getValue, regexpr, ...) extending this class
* @author jochen
*
*/
public class FunctionCall {
private String externalFunctionName;
private Map<String, String> paramMap;
private List<String> paramList;
private String uuid;
private List<Argument> argList = new LinkedList<Argument>();
private boolean isStatic = false;
private boolean doPreprocess = true;
public FunctionCall(boolean aIsStatic){
uuid = UUID.randomUUID().toString();
this.isStatic = aIsStatic;
}
public FunctionCall(boolean aIsStatic, boolean aDoPreprocess){
this(aIsStatic);
this.doPreprocess = aDoPreprocess;
}
public boolean doPreprocess(){
return this.doPreprocess;
}
public String getXSLpreparatedFunctionCall(){
return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", '" + uuid + "', $index" + ")";
}
public String getXSLpositionFunctionCall(){
return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", '" + uuid + "', $index" + ", $posVar" + ")";
}
public String getXSLdirectFunctionCall(String aCallId){
if (externalFunctionName.equals("regExpr")){
return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", " + this.paramMap.get(RegularExpression.paramExpr1) + ", " + this.paramMap.get(RegularExpression.paramExpr2) + ", '" + this.paramMap.get(RegularExpression.paramRegularExpr) + "')";
}else if (externalFunctionName.equals("convert")){
if (this.paramMap.containsKey(Convert.paramDefaultPattern) && this.paramMap.containsKey(Convert.paramFunction))
return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", " + this.paramMap.get(Convert.paramFieldValue) + ", '" + this.paramMap.get(Convert.paramVocabularyName) + "', '" + this.paramMap.get(Convert.paramDefaultPattern) + "', '" + this.paramMap.get(Convert.paramFunction) + "')";
else
return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", " + this.paramMap.get(Convert.paramFieldValue) + ", '" + this.paramMap.get(Convert.paramVocabularyName) + "')";
}else if (externalFunctionName.equals("convertString")){
if (this.paramMap.containsKey(Convert.paramDefaultPattern) && this.paramMap.containsKey(Convert.paramFunction))
return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", " + this.paramMap.get(Convert.paramFieldValue) + ", '" + this.paramMap.get(Convert.paramVocabularyName) + "', '" + this.paramMap.get(Convert.paramDefaultPattern) + "', '" + this.paramMap.get(Convert.paramFunction) + "')";
else
return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", " + this.paramMap.get(Convert.paramFieldValue) + ", '" + this.paramMap.get(Convert.paramVocabularyName) + "')";
}else if (externalFunctionName.equals("split")){
return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", " + this.paramMap.get(Split.paramInputExpr) + ", '" + this.paramMap.get(Split.paramRegExpr) + "', '" + aCallId + "')";
}else if (externalFunctionName.equals("lookup")){
return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", " + this.paramMap.get(Lookup.paramExprIdentifier) + ", '" + this.paramMap.get(Lookup.paramExprProperty) + "')";
}else if (externalFunctionName.equals("identifierExtract")){
return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", '" + StringEscapeUtils.escapeXml10(this.paramMap.get(IdentifierExtract.paramXpathExprJson)) + "', " + this.paramMap.get(IdentifierExtract.paramXpathExprInSource) + ", '" + StringEscapeUtils.escapeXml10(this.paramMap.get(IdentifierExtract.paramRegExpr)) + "')";
}else{
throw new IllegalStateException("unsupported function call: " + externalFunctionName);
}
}
public String getXSLdirectFunctionCallById(String aCallId){
if (externalFunctionName.equals("split")){
return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", '" + aCallId + "')";
}else{
throw new IllegalStateException("unsupported function call: " + externalFunctionName);
}
}
public void setExternalFunctionName(String externalFunctionName) {
this.externalFunctionName = externalFunctionName;
}
public String getExternalFunctionName() {
return externalFunctionName;
}
public void addArgument(Argument arg){
this.argList.add(arg);
}
public void setArguments(List<Argument> aArgList){
this.argList = aArgList;
}
public List<Argument> getArguments(){
return this.argList;
}
public void setParameters(Map<String, String> parameters) {
this.paramMap = parameters;
}
public Map<String, String> getParameters() {
return paramMap;
}
public String getUuid() {
return uuid;
}
/**
* @param isStatic the isStatic to set
*/
public void setStatic(boolean isStatic) {
this.isStatic = isStatic;
}
/**
* @return the isStatic
*/
public boolean isStatic() {
return isStatic;
}
/**
* @return the paramList
*/
public List<String> getParamList() {
return paramList;
}
/**
* @param paramList the paramList to set
*/
public void setParamList(List<String> paramList) {
this.paramList = paramList;
}
}

View File

@ -0,0 +1,58 @@
package eu.dnetlib.data.collective.transformation.rulelanguage.visitor;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyAssign;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyAttribute;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyOp;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTStart;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.FtScriptVisitor;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.SimpleNode;
public abstract class AbstractVisitor implements FtScriptVisitor {
/* (non-Javadoc)
* @see examples.jjtree.FtScriptVisitor#visit(examples.jjtree.ASTMyAssign, java.lang.Object)
*/
@Override
public Object visit(ASTMyAssign node, Object data) {
// TODO Auto-generated method stub
return null;
}
/* (non-Javadoc)
* @see examples.jjtree.FtScriptVisitor#visit(examples.jjtree.ASTMyAttribute, java.lang.Object)
*/
@Override
public Object visit(ASTMyAttribute node, Object data) {
// TODO Auto-generated method stub
return null;
}
/* (non-Javadoc)
* @see examples.jjtree.FtScriptVisitor#visit(examples.jjtree.ASTMyOp, java.lang.Object)
*/
@Override
public Object visit(ASTMyOp node, Object data) {
// TODO Auto-generated method stub
return null;
}
/* (non-Javadoc)
* @see examples.jjtree.FtScriptVisitor#visit(examples.jjtree.ASTStart, java.lang.Object)
*/
@Override
public Object visit(ASTStart node, Object data) {
// TODO Auto-generated method stub
return null;
}
/* (non-Javadoc)
* @see examples.jjtree.FtScriptVisitor#visit(examples.jjtree.SimpleNode, java.lang.Object)
*/
@Override
public Object visit(SimpleNode node, Object data) {
// TODO Auto-generated method stub
return null;
}
}

View File

@ -0,0 +1,306 @@
package eu.dnetlib.data.collective.transformation.rulelanguage.visitor;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.dnetlib.data.collective.transformation.rulelanguage.Argument;
import eu.dnetlib.data.collective.transformation.rulelanguage.Condition;
import eu.dnetlib.data.collective.transformation.rulelanguage.IRule;
import eu.dnetlib.data.collective.transformation.rulelanguage.Rules;
import eu.dnetlib.data.collective.transformation.rulelanguage.RulesSet;
import eu.dnetlib.data.collective.transformation.rulelanguage.Argument.Type;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyAssign;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyAttribute;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyCondition;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyCopy;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyEmpty;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyImport;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyNs;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyOp;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyPreprocess;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyScript;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyScript.SCRIPTTYPE;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMySet;
import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMySkip;
import eu.dnetlib.data.collective.transformation.rulelanguage.util.Converter;
import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall;
/**
* Implementation of the visitor pattern; maps production rules into Java data structures
* @author jochen
*
*/
public class RuleLanguageVisitor extends AbstractVisitor{
private static final Log log = LogFactory.getLog(RuleLanguageVisitor.class);
private String scriptName = "";
private ASTMyScript.SCRIPTTYPE scriptType;
private Map<String, Set<IRule>> elementMappingRules = new LinkedHashMap<String, Set<IRule>>();
private Map<String, IRule> variableMappingRules = new LinkedHashMap<String, IRule>();
private Map<String, IRule> templateMappingRules = new LinkedHashMap<String, IRule>();
private List<String> importedScriptList = new LinkedList<String>();
private List<FunctionCall> functionCallList = new LinkedList<FunctionCall>();
private Map<String, String> namespaceDeclMap = new HashMap<String, String>();
private List<Map<String, String>> preprocessingMap = new LinkedList<Map<String,String>>();
/**
* @return the name of the rule script
*/
public String getScriptName(){
return this.scriptName;
}
/**
* @return the type of the script
*/
public SCRIPTTYPE getScriptType(){
return this.scriptType;
}
/**
* @return the mapping of all rules
*/
public Map<String, Set<IRule>> getElementMappingRules(){
return this.elementMappingRules;
}
public Map<String, IRule> getVariableMappingRules(){
return this.variableMappingRules;
}
public Map<String, IRule> getTemplateMappingRules(){
return this.templateMappingRules;
}
/**
* @return the list of function calls - this is a subset of the rule mapping
*/
public List<FunctionCall> getFunctionCalls(){
return this.functionCallList;
}
/**
* @return the list of scripts that are declared as import
*/
public List<String> getImportedScripts(){
return this.importedScriptList;
}
/**
* @return the map of name-space declarations made in the script
*/
public Map<String, String> getNamespaceDeclarations(){
return this.namespaceDeclMap;
}
/**
* @return the map of preprocessings (functions, parameters)
*/
public List<Map<String, String>> getPreprocessings(){
return this.preprocessingMap;
}
public Object visit(ASTMyAssign node, Object data) {
String attrValue = "";
String fieldExprValue = "";
Rules r = node.getRule();
if (node.isFieldExpression()){
// todo e.g. convert field-expression into a xpath-expression
fieldExprValue = node.getFieldExpression();
if (fieldExprValue.startsWith("xpath:")){
fieldExprValue = Converter.getXpathFromXpathExpr(fieldExprValue);
r.setXpath(fieldExprValue);
}else if (fieldExprValue.startsWith("$") && !fieldExprValue.startsWith("$job.")){
// variable
log.debug("ruleLangVisitor: assign variable:" + fieldExprValue);
r.setAssignmentVariable(fieldExprValue);
}
}else if (node.isAttribute()){
attrValue = node.getValue();
}else {
// shouldn't happen
attrValue = "value not defined: " + node.getValue();
}
r.setConstant(attrValue);
if (r.getUniqueName().trim().length() > 0){
addRule(r, r.getUniqueName());
}
return null;
}
@Override
public Object visit(ASTMyCopy node, Object data) {
Rules r = node.getRule();
r.setTemplateMatch(node.getTemplateMatchName());
r.getProperties().setProperty("applyTemplateSelectExpression", node.getApplyTemplateSelectExpression());
r.getProperties().setProperty("copySelectExpression", node.getCopySelectExpression());
addRule(r, r.getUniqueName());
return null;
}
@Override
public Object visit(ASTMySet aNode, Object aData) {
log.debug("called method: RuleLanguageVisitor.visit(ASTMySet)");
// check if the outputfield is the same in rules of this production
Rules myRule = aNode.getRule();
if (aNode.isValueExpression()){
// todo e.g. convert field-expression into a xpath-expression
String exprValue = aNode.getValueExpression();
if (exprValue.startsWith("xpath:")){
exprValue = Converter.getXpathFromXpathExpr(exprValue);
myRule.setXpath(exprValue);
}else if (exprValue.startsWith("$") && !exprValue.startsWith("$job.")){
// variable
log.debug("ruleLangVisitor: assign variable:" + exprValue);
myRule.setAssignmentVariable(exprValue);
}
}
List<Rules> rules = aNode.getRules();
RulesSet set = new RulesSet();
//set.setPrimaryRule(rules.get(0));
myRule.setRulesSet(set);
log.debug("rulelangvisitor rule name: " + myRule.getUniqueName() + " , hasSet : " + myRule.hasSet());
set.getPendingRules().addAll(rules);
addRule(myRule, myRule.getUniqueName());
return null;
}
@Override
public Object visit(ASTMyEmpty node, Object data) {
Rules r = node.getRule();
r.setEmpty(node.isEmpty());
addRule(r, r.getUniqueName());
return null;
}
/**
* vist a production rule that is defined as an operation or external function call
* @see eu.dnetlib.data.collective.transformation.rulelanguage.visitor.AbstractVisitor#visit(eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyOp, java.lang.Object)
*/
public Object visit(ASTMyOp node, Object data) {
Rules r = node.getRule();
r.setFunctionCall(node.createFunctionCall(r.isStatic()));
functionCallList.add(r.getFunctionCall());
log.debug("fc name: " + r.getFunctionCall().getExternalFunctionName());
for(int i =0; i < node.jjtGetNumChildren(); i++) {
ASTMyAttribute sn = (ASTMyAttribute)node.jjtGetChild(i);
Argument arg = null;
if (sn.getAttributeValue() != null){
arg = new Argument(Type.VALUE, sn.getAttributeValue());
}else if (sn.getAttributeInputField() != null){
if (sn.getAttributeInputField().startsWith("xpath:")){
arg = new Argument(Type.INPUTFIELD, Converter.getXpathFromXpathExpr(sn.getAttributeInputField()));
}else if (sn.getAttributeInputField().startsWith("$job.")){
// job constant
arg = new Argument(Type.JOBCONST, sn.getAttributeInputField());
}else{
// variable
arg = new Argument(Type.VAR, sn.getAttributeInputField());
}
}else{
throw new IllegalStateException("Argument with neither value nor inputfield is not allowed.");
}
log.debug("argument: " + arg.getArgument());
r.getFunctionCall().addArgument(arg);
}
if (r.getFunctionCall().getParameters() != null){
Set<String> keys = r.getFunctionCall().getParameters().keySet();
for (String key: keys){
log.debug("key: " + key + " , value: " + r.getFunctionCall().getParameters().get(key));
}
}
log.debug("add rule with declaration: " + r.getRuleDeclaration());
addRule(r, r.getUniqueName());
return null;
}
@Override
public Object visit(ASTMyImport node, Object data) {
importedScriptList.add(node.getScriptName());
return null;
}
@Override
public Object visit(ASTMyNs node, Object data) {
namespaceDeclMap.put(node.getNsPrefix(), node.getNsUri());
return null;
}
@Override
public Object visit(ASTMyScript node, Object data) {
this.scriptName = node.getScript();
this.scriptType = node.getScriptType();
return null;
}
@Override
public Object visit(ASTMyCondition node, Object data) {
Condition condition = new Condition();
if (node.getApplyExpression().length() > 0){
String applyExpr = Converter.getXpathFromXpathExpr(node.getApplyExpression());
condition.setApplyExpression(applyExpr);
}
String conditionalExpr = Converter.getXpathFromXpathExpr(node.getConditionalExpression());
condition.setConditionExpression(conditionalExpr);
condition.setPrimaryRule(node.getPrimaryRule());
node.getPrimaryRule().setCondition(condition);
condition.setSecondaryRule(node.getSecondaryRule());
node.getSecondaryRule().setCondition(condition);
return null;
}
@Override
public Object visit(ASTMyPreprocess node, Object data) {
Map<String, String> functionMap = new HashMap<String, String>();
functionMap.put(node.getFunctionName(), node.getParameter());
preprocessingMap.add(functionMap);
return null;
}
private void addRule(IRule rule, String key){
log.debug("add rule with key: " + key);
Set<IRule> ruleSet = null;
if (rule.definesVariable()){
variableMappingRules.put(key, rule);
}else if (rule.definesTemplate()){
templateMappingRules.put(key, rule);
}else{
if (elementMappingRules.containsKey(key)){
ruleSet = elementMappingRules.get(key);
}else{
ruleSet = new LinkedHashSet<IRule>();
elementMappingRules.put(key, ruleSet);
}
ruleSet.add(rule);
}
}
@Override
public Object visit(ASTMySkip node, Object data) {
Rules r = node.getRule();
r.setSkip(true);
addRule(r, r.getUniqueName()); // ??? actually no targetField defined
return null;
}
}

View File

@ -0,0 +1,30 @@
package eu.dnetlib.data.collective.transformation.utils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.LinkedList;
import java.util.List;
public class BlacklistConsumer {
public List<String> getBlackList(String apiURL){
List<String> blacklist = new LinkedList<String>();
try{
URL blacklistApi = new URL(apiURL);
InputStream in = blacklistApi.openStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line;
while((line = reader.readLine()) != null) {
blacklist.add(line);
}
System.out.println(blacklist.size());
System.out.println(blacklist.get(0));
}catch(IOException e){
throw new IllegalStateException("error in blacklist api: " + e.getMessage());
}
return blacklist;
}
}

View File

@ -0,0 +1,48 @@
package eu.dnetlib.data.collective.transformation.utils;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
public class NamespaceContextImpl implements NamespaceContext {
private Map<String, String> nsMap = new HashMap<String, String>();
public void addNamespace(String aPrefix, String aURI){
nsMap.put(aPrefix, aURI);
}
@Override
public String getNamespaceURI(String aPrefix) {
return nsMap.get(aPrefix);
}
@Override
public String getPrefix(String aNamespaceURI) {
if (aNamespaceURI == null){
throw new IllegalStateException();
}
if (aNamespaceURI.equals(XMLConstants.XML_NS_URI)){
return XMLConstants.XML_NS_PREFIX;
}else if (aNamespaceURI.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)){
return XMLConstants.XMLNS_ATTRIBUTE;
}else if (nsMap.values().contains(aNamespaceURI)){
for (String prefix: nsMap.keySet()){
if (nsMap.get(prefix).equals(aNamespaceURI)){
return prefix;
}
}
}
return null;
}
@Override
public Iterator getPrefixes(String arg0) {
// TODO Auto-generated method stub
return null;
}
}

View File

@ -0,0 +1,88 @@
package eu.dnetlib.data.collective.transformation.utils;
import java.io.StringReader;
import java.util.List;
import org.apache.commons.lang3.StringEscapeUtils;
import eu.dnetlib.common.profile.ProfileNotFoundException;
import eu.dnetlib.data.collective.transformation.rulelanguage.RuleLanguageParser;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import eu.dnetlib.enabling.tools.ServiceLocator;
/**
*
* @author jochen
* @since 1.2
*/
public class TransformationRulesImportTool {
private ServiceLocator<ISLookUpService> lookupServiceLocator;
/**
* retrieves the transformation rule script of a transformation rule profile identified by a profile id
* @param aProfileId
* @return list of the transformation rule script and optionally profile id's of subscripts
* @throws ProfileNotFoundException
*/
protected List<String> getScript(String aProfileId) throws ProfileNotFoundException{
String xquery = "collection('/db/DRIVER/TransformationRuleDSResources')//RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value ='" +
aProfileId + "']//CODE/child::node(), " +
"for $id in (collection('/db/DRIVER/TransformationRuleDSResources')//RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value ='" +
aProfileId + "']//IMPORTED/SCRIPT_REFERENCE/@id) return string($id)";
List<String> queryResult;
try {
queryResult = lookupServiceLocator.getService().quickSearchProfile(xquery);
if (!queryResult.isEmpty()){
return queryResult;
}else{
throw new ProfileNotFoundException("no script found in profile for profileId: " + aProfileId);
}
} catch (ISLookUpException e) {
throw new ProfileNotFoundException(e);
}
}
protected void importRules(RuleLanguageParser aParser, String aProfileId) throws ProfileNotFoundException{
List<String> profileQueryResult = getScript(aProfileId);
String script = StringEscapeUtils.unescapeXml(profileQueryResult.get(0)); // the first entry contains the script
if (script.trim().startsWith("<xsl:stylesheet")){
aParser.setXslStylesheet(script.trim());
}else{
StringReader reader = new StringReader(script);
aParser.parse(reader);
if (aParser.getImportedScripts().size() != (profileQueryResult.size() - 1) ) throw new IllegalStateException(
"invalid number of scripts to import: " + aParser.getImportedScripts().size() + " != " + profileQueryResult.size());
// recursiv
if (!aParser.getImportedScripts().isEmpty()){
for (String importScriptProfileId: profileQueryResult.subList(1, profileQueryResult.size())){
RuleLanguageParser childParser = new RuleLanguageParser();
importRules(childParser, importScriptProfileId);
aParser.addRulesFromParser(childParser);
}
}
}
}
/**
* gets the rule language parser, which creates the mapping of rules which is defined in a transformation rule script identified by the transformation rule profile id
* @param aProfileId - id of the transformation rules profile
* @return the rule language parser
* @throws ProfileNotFoundException
*/
public RuleLanguageParser getRuleLanguageParser(String aProfileId) throws ProfileNotFoundException{
RuleLanguageParser parser = new RuleLanguageParser();
importRules(parser, aProfileId);
return parser;
}
public ServiceLocator<ISLookUpService> getLookupServiceLocator() {
return lookupServiceLocator;
}
public void setLookupServiceLocator(
ServiceLocator<ISLookUpService> lookupServiceLocator) {
this.lookupServiceLocator = lookupServiceLocator;
}
}

View File

@ -0,0 +1,41 @@
package eu.dnetlib.data.transformation.service;
import javax.annotation.Resource;
import eu.dnetlib.common.profile.ResourceDao;
import eu.dnetlib.data.collective.transformation.VocabularyRegistry;
import eu.dnetlib.data.collective.transformation.utils.TransformationRulesImportTool;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
public class DataTransformerFactory {
@Resource(name = "vocabularyRegistry")
private VocabularyRegistry vocabularyRegistry;
@Resource(name = "transformationTemplate")
private org.springframework.core.io.Resource transformationTemplate;
@Resource(name = "defaultSchema")
private org.springframework.core.io.Resource defaultSchema;
@Resource(name = "transformationRuleProfileUtil")
private TransformationRulesImportTool transformationRuleProfileUtil;
@Resource(name = "resourceDao")
private ResourceDao resourceDao;
@Resource(name = "blacklistApi")
private org.springframework.core.io.Resource blacklistApi;
public SimpleDataTransformer createTransformer(final String ruleid) throws ISLookUpDocumentNotFoundException, ISLookUpException {
// String profile = lookupLocator.getService().getResourceProfile(ruleid);
SimpleDataTransformer transformer = new SimpleDataTransformer(ruleid);
try {
transformer.setupEngine(vocabularyRegistry, transformationTemplate, defaultSchema, transformationRuleProfileUtil, resourceDao, blacklistApi);
} catch (Exception e) {
throw new IllegalStateException(e);
}
return transformer;
}
}

View File

@ -0,0 +1,96 @@
package eu.dnetlib.data.transformation.service;
import javax.xml.transform.TransformerConfigurationException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.core.io.Resource;
import eu.dnetlib.common.profile.ProfileNotFoundException;
import eu.dnetlib.common.profile.ResourceDao;
import eu.dnetlib.data.collective.transformation.VocabularyRegistry;
import eu.dnetlib.data.collective.transformation.engine.SimpleTransformationEngine;
import eu.dnetlib.data.collective.transformation.engine.core.TransformationImpl;
import eu.dnetlib.data.collective.transformation.utils.TransformationRulesImportTool;
import eu.dnetlib.miscutils.functional.UnaryFunction;
public class SimpleDataTransformer implements UnaryFunction<String, String> {
/**
* logger.
*/
private static final Log log = LogFactory.getLog(SimpleDataTransformer.class);
/**
* Transformation rule profile
*/
private String ruleProfile;
private SimpleTransformationEngine transformationEngine;
public SimpleDataTransformer(final String ruleProfile) {
this.ruleProfile = ruleProfile;
// TODO
// instantiate here the xml transformer
if (log.isDebugEnabled()) {
log.debug("************************************************************");
log.debug("New transformer created from profile " + ruleProfile);
log.debug("************************************************************");
}
}
@Override
public String evaluate(String record) {
if (log.isDebugEnabled()) {
log.debug("************************************************************");
log.debug("INPUT: " + record);
log.debug("************************************************************");
}
final String output = transform(record);
if (log.isDebugEnabled()) {
log.debug("************************************************************");
log.debug("OUTPUT: " + output);
log.debug("************************************************************");
}
return output;
}
private String transform(String record) {
// use here the xml transformer
return transformationEngine.transform(record);
}
protected void setupEngine(VocabularyRegistry vocabularyRegistry, Resource transformationTemplate,
Resource defaultSchema, TransformationRulesImportTool rulesProfileUtil, ResourceDao resourceDao, Resource blacklistApi)throws TransformerConfigurationException, ProfileNotFoundException{
transformationEngine = new SimpleTransformationEngine();
transformationEngine.setVocabularyRegistry(vocabularyRegistry);
TransformationImpl transformation = new TransformationImpl();
transformation.setSchema(defaultSchema);
transformation.setTemplate(transformationTemplate);
transformation.init();
if (log.isDebugEnabled()) {
log.debug("************************************************************");
log.debug(ruleProfile);
log.debug("************************************************************");
}
transformation.setRuleLanguageParser(rulesProfileUtil.getRuleLanguageParser(ruleProfile));
transformation.configureTransformation();
transformationEngine.setTransformation(transformation);
transformationEngine.setResourceDao(resourceDao);
transformationEngine.setBlacklistApi(blacklistApi);
}
public String getRuleProfile() {
return ruleProfile;
}
public void setRuleProfile(String ruleProfile) {
this.ruleProfile = ruleProfile;
}
}

View File

@ -0,0 +1,37 @@
package eu.dnetlib.data.transformation.service;
import javax.annotation.Resource;
import javax.xml.ws.wsaddressing.W3CEndpointReference;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import eu.dnetlib.data.transformation.service.rmi.TransformationService;
import eu.dnetlib.data.transformation.service.rmi.TransformationServiceException;
import eu.dnetlib.enabling.resultset.MappedResultSetFactory;
import eu.dnetlib.enabling.tools.AbstractBaseService;
public class TransformationServiceImpl extends AbstractBaseService implements TransformationService {
@Resource
private MappedResultSetFactory mappedResultSetFactory;
@Resource
private DataTransformerFactory dataTransformerFactory;
/**
* logger.
*/
private static final Log log = LogFactory.getLog(TransformationServiceImpl.class);
@Override
public W3CEndpointReference transform(String ruleid, W3CEndpointReference epr) throws TransformationServiceException {
try {
return mappedResultSetFactory.createMappedResultSet(epr, dataTransformerFactory.createTransformer(ruleid));
} catch (Exception e) {
log.error("Error generating mapped resultset - ruleId: " + ruleid, e);
throw new TransformationServiceException("Error generating mapped resultset - ruleId: " + ruleid, e);
}
}
}

View File

@ -0,0 +1,165 @@
package prototype;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.List;
import java.util.Set;
import prototype.utils.Capitalize;
import prototype.utils.DotAbbreviations;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.hash.Hashing;
//import eu.dnetlib.pace.clustering.NGramUtils;
//import eu.dnetlib.pace.util.Capitalise;
//import eu.dnetlib.pace.util.DotAbbreviations;
public class Person {
private List<String> name = Lists.newArrayList();
private List<String> surname = Lists.newArrayList();
private List<String> fullname = Lists.newArrayList();
private static Set<String> particles = null;
public Person(String s) {
s = Normalizer.normalize(s, Normalizer.Form.NFD); // was NFD
s = s.replaceAll("\\(.+\\)", "");
s = s.replaceAll("\\[.+\\]", "");
s = s.replaceAll("\\{.+\\}", "");
s = s.replaceAll("\\s+-\\s+", "-");
// s = s.replaceAll("[\\W&&[^,-]]", " ");
// System.out.println("class Person: s: " + s);
// s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}&&[^,-]]", " ");
s = s.replaceAll("[\\p{Punct}&&[^-,]]", " ");
s = s.replaceAll("\\d", " ");
s = s.replaceAll("\\n", " ");
s = s.replaceAll("\\.", " ");
s = s.replaceAll("\\s+", " ");
if (s.contains(",")) {
// System.out.println("class Person: s: " + s);
String[] arr = s.split(",");
if (arr.length == 1) {
fullname = splitTerms(arr[0]);
} else if (arr.length > 1) {
surname = splitTerms(arr[0]);
name = splitTermsFirstName(arr[1]);
// System.out.println("class Person: surname: " + surname);
// System.out.println("class Person: name: " + name);
fullname.addAll(surname);
fullname.addAll(name);
}
} else {
fullname = splitTerms(s);
int lastInitialPosition = fullname.size();
boolean hasSurnameInUpperCase = false;
for (int i = 0; i < fullname.size(); i++) {
String term = fullname.get(i);
if (term.length() == 1) {
lastInitialPosition = i;
} else if (term.equals(term.toUpperCase())) {
hasSurnameInUpperCase = true;
}
}
if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini
name = fullname.subList(0, lastInitialPosition + 1);
System.out.println("name: " + name);
surname = fullname.subList(lastInitialPosition + 1, fullname.size());
} else if (hasSurnameInUpperCase) { // Case: Michele ARTINI
for (String term : fullname) {
if (term.length() > 1 && term.equals(term.toUpperCase())) {
surname.add(term);
} else {
name.add(term);
}
}
} else if (lastInitialPosition == fullname.size()){
surname = fullname.subList(lastInitialPosition - 1, fullname.size());
name = fullname.subList(0, lastInitialPosition - 1);
}
}
}
private List<String> splitTermsFirstName(String s){
List<String> list = Lists.newArrayList();
for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) {
if (s.trim().matches("\\p{Lu}{2,3}")){
String[] parts = s.trim().split("(?=\\p{Lu})"); // (Unicode UpperCase)
for (String p: parts){
if (p.length() > 0)
list.add(p);
}
}else{
list.add(part);
}
}
return list;
}
private List<String> splitTerms(String s) {
if (particles == null) {
// particles = NGramUtils.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt");
}
List<String> list = Lists.newArrayList();
for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) {
// if (!particles.contains(part.toLowerCase())) {
list.add(part);
// }
}
return list;
}
public List<String> getName() {
return name;
}
public List<String> getSurname() {
return surname;
}
public List<String> getFullname() {
return fullname;
}
public String hash() {
return Hashing.murmur3_128().hashString(getNormalisedFullname(),StandardCharsets.UTF_8).toString();
}
public String getNormalisedFullname() {
return isAccurate() ?
Joiner.on(" ").join(getSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) :
Joiner.on(" ").join(fullname);
// return isAccurate() ?
// Joiner.on(" ").join(getCapitalSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) :
// Joiner.on(" ").join(fullname);
}
public List<String> getCapitalSurname() {
return Lists.newArrayList(Iterables.transform(surname, new Capitalize() ));
}
public List<String> getNameWithAbbreviations() {
return Lists.newArrayList(Iterables.transform(name, new DotAbbreviations() ));
}
public boolean isAccurate() {
return (name != null && surname != null && !name.isEmpty() && !surname.isEmpty());
}
}

View File

@ -0,0 +1,129 @@
package prototype;
import java.nio.charset.StandardCharsets;
import java.text.Normalizer;
import java.util.List;
import java.util.Set;
import prototype.utils.Capitalize;
import prototype.utils.DotAbbreviations;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.hash.Hashing;
//import eu.dnetlib.pace.clustering.NGramUtils;
//import eu.dnetlib.pace.util.Capitalise;
//import eu.dnetlib.pace.util.DotAbbreviations;
public class PersonOrig {
private List<String> name = Lists.newArrayList();
private List<String> surname = Lists.newArrayList();
private List<String> fullname = Lists.newArrayList();
private static Set<String> particles = null;
public PersonOrig(String s) {
s = Normalizer.normalize(s, Normalizer.Form.NFD);
s = s.replaceAll("\\(.+\\)", "");
s = s.replaceAll("\\[.+\\]", "");
s = s.replaceAll("\\{.+\\}", "");
s = s.replaceAll("\\s+-\\s+", "-");
s = s.replaceAll("[\\W&&[^,-]]", " ");
s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}&&[^,-]]", " ");
s = s.replaceAll("[\\p{Punct}&&[^-,]]", " ");
s = s.replaceAll("\\d", " ");
s = s.replaceAll("\\n", " ");
s = s.replaceAll("\\.", " ");
s = s.replaceAll("\\s+", " ");
if (s.contains(",")) {
String[] arr = s.split(",");
if (arr.length == 1) {
fullname = splitTerms(arr[0]);
} else if (arr.length > 1) {
surname = splitTerms(arr[0]);
name = splitTerms(arr[1]);
fullname.addAll(surname);
fullname.addAll(name);
}
} else {
fullname = splitTerms(s);
int lastInitialPosition = fullname.size();
boolean hasSurnameInUpperCase = false;
for (int i = 0; i < fullname.size(); i++) {
String term = fullname.get(i);
if (term.length() == 1) {
lastInitialPosition = i;
} else if (term.equals(term.toUpperCase())) {
hasSurnameInUpperCase = true;
}
}
if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini
name = fullname.subList(0, lastInitialPosition + 1);
surname = fullname.subList(lastInitialPosition + 1, fullname.size());
} else if (hasSurnameInUpperCase) { // Case: Michele ARTINI
for (String term : fullname) {
if (term.length() > 1 && term.equals(term.toUpperCase())) {
surname.add(term);
} else {
name.add(term);
}
}
}
}
}
private List<String> splitTerms(String s) {
// if (particles == null) {
// particles = NGramUtils.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt");
// }
List<String> list = Lists.newArrayList();
for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) {
// if (!particles.contains(part.toLowerCase())) {
list.add(part);
// }
}
return list;
}
public List<String> getName() {
return name;
}
public List<String> getSurname() {
return surname;
}
public List<String> getFullname() {
return fullname;
}
public String hash() {
return Hashing.murmur3_128().hashString(getNormalisedFullname(), StandardCharsets.UTF_8).toString();
}
public String getNormalisedFullname() {
return isAccurate() ?
Joiner.on(" ").join(getCapitalSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) :
Joiner.on(" ").join(fullname);
}
public List<String> getCapitalSurname() {
return Lists.newArrayList(Iterables.transform(surname, new Capitalize()));
}
public List<String> getNameWithAbbreviations() {
return Lists.newArrayList(Iterables.transform(name, new DotAbbreviations()));
}
public boolean isAccurate() {
return (name != null && surname != null && !name.isEmpty() && !surname.isEmpty());
}
}

Some files were not shown because too many files have changed in this diff Show More