From 22d126ffa8d74aaff631a1d77e1a8242756f7662 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Fri, 7 Jun 2019 17:38:50 +0200 Subject: [PATCH] importer dnet-data-transformation-service and unibi-data-collective-transformation-common in dnet-core-components, the transformation inspector was left behind for the moment --- dnet-data-services/pom.xml | 13 + .../dnetlib/common/profile/DnetResource.java | 58 + .../eu/dnetlib/common/profile/IResource.java | 11 + .../dnetlib/common/profile/IResourceDao.java | 17 + .../common/profile/IResourceDaoSupport.java | 12 + .../profile/ProfileNotFoundException.java | 30 + .../eu/dnetlib/common/profile/Resource.java | 31 + .../dnetlib/common/profile/ResourceCache.java | 90 + .../dnetlib/common/profile/ResourceDao.java | 42 + .../profile/ResourceDaoRemoteSupport.java | 119 + .../eu/dnetlib/common/utils/EprUtils.java | 245 ++ .../eu/dnetlib/common/utils/XMLException.java | 19 + .../dnetlib/common/utils/XMLSerializer.java | 45 + .../eu/dnetlib/common/utils/XMLUtils.java | 54 + .../transformation/IDatabaseConnector.java | 20 + .../TransformationException.java | 44 + .../transformation/VocabularyMap.java | 38 + .../transformation/VocabularyRegistry.java | 96 + .../transformation/VocabularyTypeEditor.java | 25 + .../transformation/core/schema/Namespace.java | 47 + .../core/schema/SchemaAttribute.java | 42 + .../core/schema/SchemaElement.java | 160 ++ .../core/schema/SchemaInspector.java | 98 + .../core/schema/visitor/Visitor.java | 168 ++ .../visitor/XSContentTypeVisitorImpl.java | 82 + .../schema/visitor/XSTermVisitorImpl.java | 56 + .../core/xsl/AbstractXslElement.java | 48 + .../core/xsl/XslConstructor.java | 406 ++++ .../transformation/core/xsl/XslElement.java | 43 + .../core/xsl/XsltConstants.java | 32 + .../xsl/ext/TransformationFunctionProxy.java | 345 +++ .../engine/FunctionResults.java | 67 + .../transformation/engine/PreProcessor.java | 240 ++ .../engine/SimpleTransformationEngine.java | 409 ++++ .../engine/core/ITransformation.java | 54 + .../engine/core/StylesheetBuilder.java | 335 +++ .../engine/core/TransformationImpl.java | 353 +++ .../AbstractTransformationFunction.java | 12 + .../engine/functions/Convert.java | 74 + .../engine/functions/DateVocabulary.java | 108 + .../engine/functions/Dblookup.java | 72 + .../engine/functions/Extract.java | 50 + .../engine/functions/IFeatureExtraction.java | 25 + .../functions/ITransformationFunction.java | 5 + .../engine/functions/IVocabulary.java | 31 + .../engine/functions/IdentifierExtract.java | 114 + .../engine/functions/Lookup.java | 34 + .../engine/functions/LookupRecord.java | 33 + .../engine/functions/PersonVocabulary.java | 26 + .../engine/functions/ProcessingException.java | 46 + .../engine/functions/RegularExpression.java | 60 + .../engine/functions/RetrieveValue.java | 157 ++ .../engine/functions/Split.java | 86 + .../engine/functions/Vocabulary.java | 209 ++ .../transformation/rulelanguage/Argument.java | 38 + .../rulelanguage/Condition.java | 76 + .../transformation/rulelanguage/IRule.java | 27 + .../rulelanguage/RuleLanguageParser.java | 129 ++ .../transformation/rulelanguage/Rules.java | 316 +++ .../transformation/rulelanguage/RulesSet.java | 29 + .../rulelanguage/parser/ASTMyAssign.java | 59 + .../rulelanguage/parser/ASTMyAttribute.java | 38 + .../rulelanguage/parser/ASTMyCondition.java | 69 + .../rulelanguage/parser/ASTMyCopy.java | 47 + .../rulelanguage/parser/ASTMyEmpty.java | 32 + .../rulelanguage/parser/ASTMyImport.java | 32 + .../rulelanguage/parser/ASTMyNs.java | 42 + .../rulelanguage/parser/ASTMyOp.java | 249 +++ .../rulelanguage/parser/ASTMyPreprocess.java | 56 + .../rulelanguage/parser/ASTMyScript.java | 48 + .../rulelanguage/parser/ASTMySet.java | 68 + .../rulelanguage/parser/ASTMySkip.java | 29 + .../rulelanguage/parser/ASTStart.java | 21 + .../rulelanguage/parser/AbstractNode.java | 42 + .../rulelanguage/parser/FtScript.java | 1333 +++++++++++ .../parser/FtScriptConstants.java | 217 ++ .../parser/FtScriptTokenManager.java | 1992 +++++++++++++++++ .../parser/FtScriptTreeConstants.java | 39 + .../rulelanguage/parser/FtScriptVisitor.java | 21 + .../rulelanguage/parser/JJTFtScriptState.java | 123 + .../rulelanguage/parser/Node.java | 39 + .../rulelanguage/parser/ParseException.java | 187 ++ .../rulelanguage/parser/SimpleCharStream.java | 471 ++++ .../rulelanguage/parser/SimpleNode.java | 96 + .../rulelanguage/parser/Token.java | 131 ++ .../rulelanguage/parser/TokenMgrError.java | 147 ++ .../transformation/rulelanguage/parser/ft.jj | 909 ++++++++ .../transformation/rulelanguage/parser/ft.jjt | 520 +++++ .../rulelanguage/util/Converter.java | 67 + .../rulelanguage/util/FunctionCall.java | 146 ++ .../rulelanguage/visitor/AbstractVisitor.java | 58 + .../visitor/RuleLanguageVisitor.java | 306 +++ .../utils/BlacklistConsumer.java | 30 + .../utils/NamespaceContextImpl.java | 48 + .../utils/TransformationRulesImportTool.java | 88 + .../service/DataTransformerFactory.java | 41 + .../service/SimpleDataTransformer.java | 96 + .../service/TransformationServiceImpl.java | 37 + .../src/main/java/prototype/Person.java | 165 ++ .../src/main/java/prototype/PersonOrig.java | 129 ++ .../main/java/prototype/utils/Capitalize.java | 13 + .../prototype/utils/DotAbbreviations.java | 11 + .../javax.xml.transform.TransformerFactory | 1 + ...ntext-dnet-data-transformation-service.xml | 34 + ...plicationContext-transformation-common.xml | 49 + ...plicationContext-transformation.properties | 8 + .../transformation/engine/identity.xsl | 8 + .../transformation/engine/oaftemplate.xsl | 15 + .../engine/syntaxcheckfailed.xsl | 14 + .../transformation/engine/template.xsl | 18 + .../transformation/schema/DMFSchema.xsd | 60 + .../schema/DMFSchema_vTransformator.xsd | 72 + .../transformation/schema/DMF_OAI.xsd | 39 + .../transformation/schema/DRIVER_DC.xsd | 154 ++ .../transformation/schema/DRIVER_DR.xsd | 151 ++ .../transformation/schema/DRIVER_DRI.xsd | 68 + .../DRIVER_OAI-ProvenanceInfoSchema.xsd | 44 + .../schema/OAFSchema_vTransformator.xsd | 90 + .../transformation/schema/OPENAIRE_OAF.xsd | 163 ++ .../enabling/views/inspector/transform.st | 36 + .../modular/ui/views/ui/transform.st | 44 + .../TransformationRuleDSResourceType.xsd | 108 + .../web/resources/js/transform/transform.js | 28 + ...ext-dnet-data-transformation-inspector.xml | 31 + .../VocabularyTypeEditorTest.java | 35 + .../engine/PreProcessorTest.java | 141 ++ .../SimpleTransformationEngineTest.java | 937 ++++++++ .../transformation/engine/StylesheetTest.java | 84 + .../engine/core/TransformationImplTest.java | 98 + .../engine/functions/ConvertTest.java | 98 + .../engine/functions/DateVocabularyTest.java | 73 + .../functions/RegularExpressionTest.java | 74 + .../engine/functions/RetrieveValueTest.java | 93 + .../engine/functions/SplitTest.java | 35 + .../rulelanguage/RuleLanguageTest.java | 558 +++++ .../rulelanguage/TransformationTest.java | 118 + .../TransformationRulesImportToolTest.java | 119 + .../src/test/resources/lang_vocabulary.xml | 1608 +++++++++++++ .../src/test/resources/mainScript_example.ftl | 14 + .../src/test/resources/sample_record_dmf.xml | 47 + .../src/test/resources/subScript_example.ftl | 10 + .../src/test/resources/trds_sample.xml | 38 + .../src/test/resources/trds_sample2.xml | 32 + .../src/test/resources/type_vocabulary.xml | 112 + pom.xml | 10 + 145 files changed, 19612 insertions(+) create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/common/profile/DnetResource.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/common/profile/IResource.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/common/profile/IResourceDao.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/common/profile/IResourceDaoSupport.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/common/profile/ProfileNotFoundException.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/common/profile/Resource.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/common/profile/ResourceCache.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/common/profile/ResourceDao.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/common/profile/ResourceDaoRemoteSupport.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/common/utils/EprUtils.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/common/utils/XMLException.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/common/utils/XMLSerializer.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/common/utils/XMLUtils.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/IDatabaseConnector.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/TransformationException.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/VocabularyMap.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/VocabularyRegistry.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/VocabularyTypeEditor.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/Namespace.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/SchemaAttribute.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/SchemaElement.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/SchemaInspector.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/visitor/Visitor.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/visitor/XSContentTypeVisitorImpl.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/visitor/XSTermVisitorImpl.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/AbstractXslElement.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/XslConstructor.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/XslElement.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/XsltConstants.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/ext/TransformationFunctionProxy.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/FunctionResults.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/PreProcessor.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/SimpleTransformationEngine.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/core/ITransformation.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/core/StylesheetBuilder.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/core/TransformationImpl.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/AbstractTransformationFunction.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Convert.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/DateVocabulary.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Dblookup.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Extract.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IFeatureExtraction.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/ITransformationFunction.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IVocabulary.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IdentifierExtract.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Lookup.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/LookupRecord.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/PersonVocabulary.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/ProcessingException.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/RegularExpression.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/RetrieveValue.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Split.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Vocabulary.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/Argument.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/Condition.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/IRule.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/RuleLanguageParser.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/Rules.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/RulesSet.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyAssign.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyAttribute.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyCondition.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyCopy.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyEmpty.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyImport.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyNs.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyOp.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyPreprocess.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyScript.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMySet.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMySkip.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTStart.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/AbstractNode.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScript.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptConstants.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptTokenManager.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptTreeConstants.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptVisitor.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/JJTFtScriptState.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/Node.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ParseException.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/SimpleCharStream.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/SimpleNode.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/Token.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/TokenMgrError.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ft.jj create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ft.jjt create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/util/Converter.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/util/FunctionCall.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/visitor/AbstractVisitor.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/visitor/RuleLanguageVisitor.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/utils/BlacklistConsumer.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/utils/NamespaceContextImpl.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/utils/TransformationRulesImportTool.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/transformation/service/DataTransformerFactory.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/transformation/service/SimpleDataTransformer.java create mode 100644 dnet-data-services/src/main/java/eu/dnetlib/data/transformation/service/TransformationServiceImpl.java create mode 100644 dnet-data-services/src/main/java/prototype/Person.java create mode 100644 dnet-data-services/src/main/java/prototype/PersonOrig.java create mode 100644 dnet-data-services/src/main/java/prototype/utils/Capitalize.java create mode 100644 dnet-data-services/src/main/java/prototype/utils/DotAbbreviations.java create mode 100644 dnet-data-services/src/main/resources/META-INF/services/javax.xml.transform.TransformerFactory create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/applicationContext-dnet-data-transformation-service.xml create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/applicationContext-transformation-common.xml create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/applicationContext-transformation.properties create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/identity.xsl create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/oaftemplate.xsl create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/syntaxcheckfailed.xsl create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/template.xsl create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DMFSchema.xsd create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DMFSchema_vTransformator.xsd create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DMF_OAI.xsd create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_DC.xsd create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_DR.xsd create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_DRI.xsd create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_OAI-ProvenanceInfoSchema.xsd create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/OAFSchema_vTransformator.xsd create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/OPENAIRE_OAF.xsd create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/enabling/views/inspector/transform.st create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/functionality/modular/ui/views/ui/transform.st create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/test/schemas/TransformationRuleDSResourceType.xsd create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/web/resources/js/transform/transform.js create mode 100644 dnet-data-services/src/main/resources/eu/dnetlib/webContext-dnet-data-transformation-inspector.xml create mode 100644 dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/VocabularyTypeEditorTest.java create mode 100644 dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/PreProcessorTest.java create mode 100644 dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/SimpleTransformationEngineTest.java create mode 100644 dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/StylesheetTest.java create mode 100644 dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/core/TransformationImplTest.java create mode 100644 dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/ConvertTest.java create mode 100644 dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/DateVocabularyTest.java create mode 100644 dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/RegularExpressionTest.java create mode 100644 dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/RetrieveValueTest.java create mode 100644 dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/SplitTest.java create mode 100644 dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/rulelanguage/RuleLanguageTest.java create mode 100644 dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/rulelanguage/TransformationTest.java create mode 100644 dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/utils/TransformationRulesImportToolTest.java create mode 100644 dnet-data-services/src/test/resources/lang_vocabulary.xml create mode 100644 dnet-data-services/src/test/resources/mainScript_example.ftl create mode 100644 dnet-data-services/src/test/resources/sample_record_dmf.xml create mode 100644 dnet-data-services/src/test/resources/subScript_example.ftl create mode 100644 dnet-data-services/src/test/resources/trds_sample.xml create mode 100644 dnet-data-services/src/test/resources/trds_sample2.xml create mode 100644 dnet-data-services/src/test/resources/type_vocabulary.xml diff --git a/dnet-data-services/pom.xml b/dnet-data-services/pom.xml index 04d8a71..2d13584 100644 --- a/dnet-data-services/pom.xml +++ b/dnet-data-services/pom.xml @@ -23,14 +23,27 @@ ${project.version} + + commons-beanutils + commons-beanutils + + org.json json + + org.svenson + svenson-json + com.ximpleware vtd-xml + + com.sun.xsom + xsom + com.jcraft jsch diff --git a/dnet-data-services/src/main/java/eu/dnetlib/common/profile/DnetResource.java b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/DnetResource.java new file mode 100644 index 0000000..ffcd8fc --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/DnetResource.java @@ -0,0 +1,58 @@ +package eu.dnetlib.common.profile; + +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dom4j.Document; +import org.dom4j.Node; + +import eu.dnetlib.common.utils.XMLException; +import eu.dnetlib.common.utils.XMLUtils; + +public abstract class DnetResource { + + private static final Log log = LogFactory.getLog(DnetResource.class); + + Document resource; + + public DnetResource() { + } + + public DnetResource(Document resource){ + this.resource = resource; + } + + public void setResource(Document resource){ + this.resource = resource; + } + + public Document getResource(){ + return this.resource; + } + + public String getValue(String xpathExpr){ + String value = null; + try { + value = XMLUtils.evaluate(resource, xpathExpr); + } catch (XMLException e) { + log.error(e); + } + return value; + } + + public void setValue(String xpathExpr, String value){ + XMLUtils.getNode(resource, xpathExpr).setText(value); + } + + public List getNodeList(String xpathExpr){ + List nodeList = null; + try { + nodeList = XMLUtils.getNodes(resource, xpathExpr); + } catch (XMLException e) { + log.error(e); + } + return nodeList; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/common/profile/IResource.java b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/IResource.java new file mode 100644 index 0000000..b825682 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/IResource.java @@ -0,0 +1,11 @@ +package eu.dnetlib.common.profile; + +import java.util.List; + + +public interface IResource { + + public String getValue(String xpathExpr); + @SuppressWarnings("unchecked") + public List getNodeList(String xpathExpr); +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/common/profile/IResourceDao.java b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/IResourceDao.java new file mode 100644 index 0000000..407a1b0 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/IResourceDao.java @@ -0,0 +1,17 @@ +package eu.dnetlib.common.profile; + +import java.util.List; + +public interface IResourceDao { +// TODO documentation + public List getResources(String xquery); + + public Resource getResource(String id) throws Exception; + + public Resource getResourceByQuery(String query) throws Exception; + + public void removeResource(String id, Resource resource); + + public void updateResource(String id, Resource resource); + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/common/profile/IResourceDaoSupport.java b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/IResourceDaoSupport.java new file mode 100644 index 0000000..18ed8fe --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/IResourceDaoSupport.java @@ -0,0 +1,12 @@ +package eu.dnetlib.common.profile; + +import java.util.List; + +public interface IResourceDaoSupport { +// TODO documentation + public List getResources(String xquery); + public Resource getResourceByXquery(String xquery) throws Exception; + public Resource getResource(String id) throws Exception; + public void updateResource(String id, Resource resource); + public void removeResource(String id, Resource resource); +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/common/profile/ProfileNotFoundException.java b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/ProfileNotFoundException.java new file mode 100644 index 0000000..4cf76da --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/ProfileNotFoundException.java @@ -0,0 +1,30 @@ +/** + * + */ +package eu.dnetlib.common.profile; + +/** + * @author jochen + * + */ +public class ProfileNotFoundException extends Exception { + + /** + * + */ + private static final long serialVersionUID = -6272083305345284826L; + + public ProfileNotFoundException(Throwable e) { + super(e); + } + + public ProfileNotFoundException(String msg, Throwable e) { + super(msg, e); + } + + public ProfileNotFoundException(String msg) { + super(msg); + } + + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/common/profile/Resource.java b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/Resource.java new file mode 100644 index 0000000..1cdd71c --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/Resource.java @@ -0,0 +1,31 @@ +package eu.dnetlib.common.profile; + +import java.io.InputStream; + +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.DocumentHelper; +import org.dom4j.io.SAXReader; + +/** + * @author jochen + * + */ +public class Resource extends DnetResource implements IResource{ + + + public Resource(){super();} + + public Resource(Document resource){ + super(resource); + } + + public Resource(String resourceProfile) throws DocumentException{ + super(DocumentHelper.parseText(resourceProfile)); + } + + public Resource(InputStream resourceProfileStream) throws DocumentException{ + super( (new SAXReader()).read(resourceProfileStream)); + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/common/profile/ResourceCache.java b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/ResourceCache.java new file mode 100644 index 0000000..fe21665 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/ResourceCache.java @@ -0,0 +1,90 @@ +/** + * + */ +package eu.dnetlib.common.profile; + +import java.util.List; +import java.util.concurrent.TimeUnit; + +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; + +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +// import eu.dnetlib.enabling.locators.DefaultUniqueServiceLocator; +import eu.dnetlib.enabling.tools.ServiceLocator; + +/** + * @author jochen + * + */ +public class ResourceCache implements IResourceDaoSupport{ + + private final LoadingCache cache; + @javax.annotation.Resource(name="lookupLocator") + private ServiceLocator lookupLocator; + + public ResourceCache() { + cache = CacheBuilder.newBuilder().expireAfterWrite(24, TimeUnit.HOURS).build(new CacheLoader(){ + + @Override + public Resource load(String aKey) throws Exception { + Resource resource = null; + if (aKey.startsWith("collection")){ + return new Resource(byQuery(aKey)); + }else{ + return new Resource(byId(aKey)); + } + } + + private String byQuery(String aQuery) throws ISLookUpDocumentNotFoundException, ISLookUpException{ + return lookupLocator.getService().getResourceProfileByQuery(aQuery); + } + + private String byId(String aId) throws ISLookUpDocumentNotFoundException, ISLookUpException{ + return lookupLocator.getService().getResourceProfile(aId); + } + + }); + } + + @Override + public List getResources(String xquery) { + // TODO Auto-generated method stub + return null; + } + + @Override + public Resource getResourceByXquery(String xquery)throws Exception { + return cache.get(xquery); + } + + @Override + public Resource getResource(String id)throws Exception { + return cache.get(id); + } + + @Override + public void updateResource(String id, Resource resource) { + // TODO Auto-generated method stub + + } + + @Override + public void removeResource(String id, Resource resource) { + // TODO Auto-generated method stub + + } + + public void setLookupLocator(ServiceLocator lookupLocator) { + this.lookupLocator = lookupLocator; + } + + public ServiceLocator getLookupLocator() { + return lookupLocator; + } + + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/common/profile/ResourceDao.java b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/ResourceDao.java new file mode 100644 index 0000000..3b4b65f --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/ResourceDao.java @@ -0,0 +1,42 @@ +package eu.dnetlib.common.profile; + +import java.util.List; + + +public class ResourceDao implements IResourceDao { + + private IResourceDaoSupport daoSupport; + + public List getResources(String xquery) { + return daoSupport.getResources(xquery); + } + + @Override + public Resource getResourceByQuery(String query)throws Exception { + // currently only Xquery is supported + return daoSupport.getResourceByXquery(query); + } + + public Resource getResource(String id)throws Exception { + return daoSupport.getResource(id); + } + + public void removeResource(String id, Resource resource) { + daoSupport.removeResource(id, resource); + } + + public void updateResource(String id, Resource resource) { + daoSupport.updateResource(id, resource); + } + + public void setDaoSupport(IResourceDaoSupport daoSupport) { + this.daoSupport = daoSupport; + } + + public IResourceDaoSupport getDaoSupport() { + return daoSupport; + } + + + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/common/profile/ResourceDaoRemoteSupport.java b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/ResourceDaoRemoteSupport.java new file mode 100644 index 0000000..d1907ed --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/common/profile/ResourceDaoRemoteSupport.java @@ -0,0 +1,119 @@ +package eu.dnetlib.common.profile; + +import java.util.LinkedList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dom4j.DocumentException; + +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import eu.dnetlib.enabling.is.registry.rmi.ISRegistryException; +import eu.dnetlib.enabling.is.registry.rmi.ISRegistryService; +import eu.dnetlib.enabling.tools.ServiceLocator; + +/** + * + * @author jochen + * @deprecated this class is deprecated, use ResourceCache instead. + * + */ +@Deprecated +public class ResourceDaoRemoteSupport implements IResourceDaoSupport{ + private static final Log log = LogFactory.getLog(ResourceDaoRemoteSupport.class); + + @javax.annotation.Resource(name="lookupLocator") + private ServiceLocator lookupLocator; + @javax.annotation.Resource(name="registryLocator") + private ServiceLocator registryLocator; + + public List getResources(String xquery){ + List list = new LinkedList(); + try { + List profileList = lookupLocator.getService().quickSearchProfile(xquery); + if (profileList != null){ + for (String profile: profileList){ + Resource resource = new Resource(profile); + list.add(resource); + } + } + } catch (ISLookUpException e) { + log.error(e); + } catch (DocumentException e) { + log.error(e); + } + return list; + } + + @Override + public Resource getResourceByXquery(String xquery) { + Resource resource = null; + String profile; + try{ + profile = lookupLocator.getService().getResourceProfileByQuery(xquery); + resource = new Resource(profile); + } catch (ISLookUpDocumentNotFoundException e) { + log.error(e); + } catch (ISLookUpException e) { + log.error(e); + } catch (DocumentException e) { + log.error(e); + } + return resource; + } + + public Resource getResource(String id) { + Resource resource = null; + String profile; + try { + profile = lookupLocator.getService().getResourceProfile(id); + resource = new Resource(profile); + } catch (ISLookUpDocumentNotFoundException e) { + log.error(e); + } catch (ISLookUpException e) { + log.error(e); + } catch (DocumentException e) { + log.error(e); + } + return resource; + } + + + @Override + public void removeResource(String id, Resource resource) { + // TODO Auto-generated method stub + + } + + @Override + public void updateResource(String id, Resource resource) { + try { + registryLocator.getService().updateProfile(resource.getValue("//RESOURCE_IDENTIFIER/@value"), resource.getResource().asXML(), resource.getValue("//RESOURCE_TYPE/@value")); + } catch (ISRegistryException e) { + log.error(e); + throw new IllegalStateException("cannot update profile.", e); + } + } + + public void setRegistryLocator(ServiceLocator registryLocator) { + this.registryLocator = registryLocator; + } + + public ServiceLocator getRegistryLocator() { + return registryLocator; + } + + public void setLookupLocator(ServiceLocator lookupLocator) { + this.lookupLocator = lookupLocator; + } + + public ServiceLocator getLookupLocator() { + return lookupLocator; + } + + + + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/common/utils/EprUtils.java b/dnet-data-services/src/main/java/eu/dnetlib/common/utils/EprUtils.java new file mode 100644 index 0000000..ff52a7b --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/common/utils/EprUtils.java @@ -0,0 +1,245 @@ +/** + * Copyright 2008-2009 DRIVER PROJECT (ICM UW) + * Original author: Marek Horst + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package eu.dnetlib.common.utils; + +import java.io.StringReader; +import java.util.Map; + +import javax.xml.namespace.QName; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.ws.wsaddressing.W3CEndpointReference; +import javax.xml.ws.wsaddressing.W3CEndpointReferenceBuilder; + +import org.apache.log4j.Logger; +import org.dom4j.io.DocumentResult; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.InputSource; + +/** + * Information Service utils class. + * @author Marek Horst + * @version 0.7.6 + * + */ +public class EprUtils { + + protected static final Logger log = Logger.getLogger(EprUtils.class); + + public static final String INDEX_RESULT_SET_NAME = "ICMResultSet"; + public static final String SERVICE_NAME = "IndexService"; + + private DocumentResult infoset = new DocumentResult(); + + + /** + * @param epr - W3CEndpoint reference + * @param nsMap - mapping of namespace-prefix,uri pairs + */ + public EprUtils(W3CEndpointReference epr, Map nsMap){ + epr.writeTo(infoset); + XMLUtils.setNamespaces(nsMap); + } + + /** + * @param xpathExpr + * @return the value obtained by the xpath evaluation + * @throws XMLException + */ + public String getValue(String xpathExpr) throws XMLException{ + return XMLUtils.evaluate(infoset.getDocument(), xpathExpr); + } + + + /** + * Parses ResultSetEPR to the String[] where: + * String[0] - ResultSetService location, + * String[1] - ResultSetId + * @param resultSetEPR + * @return string array where: String[0] - ResultSetService location, String[1] - ResultSetId + */ + @Deprecated + public static String[] parseResultSetEPR(String resultSetEPR) { + if (resultSetEPR==null || resultSetEPR.length()==0) + return null; + DocumentBuilderFactory factory = + DocumentBuilderFactory.newInstance(); + factory.setIgnoringComments(true); + factory.setValidating(false); + DocumentBuilder db; + try { + db = factory.newDocumentBuilder(); + Document doc = db.parse(new InputSource(new StringReader(resultSetEPR))); + Element documentElement = doc.getDocumentElement(); + NodeList nodeList = documentElement.getElementsByTagName("ResourceIdentifier:ResourceIdentifier"); + if (nodeList.getLength()!=1) { + log.error("Invalid notifications of nodes for driver:ResourceIdentifier element. Expected 1, found: "+nodeList.getLength()); + return null; + } + if (nodeList.item(0)==null) { + log.error("Couldn't find ResourceIdentifier:ResourceIdentifier element!"); + return null; + } + NodeList nodeListWSA = documentElement.getElementsByTagName("Address"); + if (nodeListWSA.getLength()!=1) { + nodeListWSA = documentElement.getElementsByTagName("Address"); + if (nodeListWSA.getLength()!=1) { + log.error("Invalid notifications of nodes for Address element. Expected 1, found: "+nodeListWSA.getLength()); + return null; + } + } + if (nodeListWSA.item(0)==null) { + log.error("Couldn't find Address element!"); + return null; + } + return new String[] { + getStringFromNode(nodeListWSA.item(0)), + getStringFromNode(nodeList.item(0)) + }; + + } catch (Exception e) { + log.error("Exception occured when extracting ResultSet id from ResultSet service xml-type response!",e); + return null; + } + } + + /** + * Extracts ResultSet identifier from ResultSet xml-type response. + * @param sourceResultSetId + * @return ResultSet identifier. + */ + public static String extractResultSetId(String sourceResultSetId) { + if (sourceResultSetId==null || sourceResultSetId.length()==0) + return null; + DocumentBuilderFactory factory = + DocumentBuilderFactory.newInstance(); + factory.setIgnoringComments(true); + factory.setValidating(false); + DocumentBuilder db; + try { + db = factory.newDocumentBuilder(); + Document doc = db.parse(new InputSource(new StringReader(sourceResultSetId))); + Element documentElement = doc.getDocumentElement(); + NodeList nodeList = documentElement.getElementsByTagName("ResourceIdentifier:ResourceIdentifier"); + if (nodeList.getLength()!=1) { + log.error("Invalid notifications of nodes for ResourceIdentifier:ResourceIdentifier element. Expected 1, found: "+nodeList.getLength()); + return null; + } + + if (nodeList.item(0)==null) { + log.error("Couldn't find ResourceIdentifier:ResourceIdentifier element!"); + return null; + } + return getStringFromNode(nodeList.item(0)); + + } catch (Exception e) { + log.error("Exception occured when extracting ResultSet id from ResultSet service xml-type response!",e); + return null; + } + } + + + private static String getStringFromNode(Node node) { + +// This code may not work on some jdk +// Element resourceIdentifier = (Element) node; +// return resourceIdentifier.getTextContent(); + + /* + try { + DOMSource domSource = new DOMSource(node); + StringWriter writer = new StringWriter(); + StreamResult result = new StreamResult(writer); + TransformerFactory tf = TransformerFactory.newInstance(); + Transformer transformer = tf.newTransformer(); + transformer.transform(domSource, result); + return writer.toString(); + } catch (TransformerException e) { + log.error("Exception occured when transforming node value!", e); + return null; + } + */ + + return node.getFirstChild().getNodeValue(); + } + + /** + * Builds ResultSet end point reference for given serviceAddress and resultSetId. + * @param serviceAddress + * @param resultSetId + * @param wsdlLocation + * @return resultSet EPR + */ + public static String buildResultSetEPR(String serviceAddress, String resultSetId, + String wsdlLocation) { + StringBuffer strBuff = new StringBuffer(); + strBuff.append(""); + strBuff.append(""); + strBuff.append("
"); + strBuff.append(serviceAddress); + strBuff.append("
"); + strBuff.append(""); + strBuff.append(""); + strBuff.append(resultSetId); + strBuff.append(""); + strBuff.append(""); + strBuff.append(""); + strBuff.append(""); + strBuff.append(INDEX_RESULT_SET_NAME); + strBuff.append(""); + strBuff.append(""); + strBuff.append("
"); + return strBuff.toString(); + } + + /** + * Builds W3C ResultSet end point reference for given serviceAddress and + * resultSetId. + * + * @param serviceAddress + * @param resultSetId + * @param wsdlLocation + * @return W3C resultSet EPR + * @throws ParserConfigurationException + */ + public static W3CEndpointReference buildW3CEPR( + String serviceAddress, String wsdlLocation) throws ParserConfigurationException { + + final W3CEndpointReferenceBuilder W3CResultSetEPR = new W3CEndpointReferenceBuilder(); + + W3CResultSetEPR.address(serviceAddress); + W3CResultSetEPR.serviceName(new QName("http://www.w3.org/2006/02/addressing/wsdl",SERVICE_NAME)); + W3CResultSetEPR.endpointName(new QName("http://www.driver.org/schema",SERVICE_NAME)); + W3CResultSetEPR.wsdlDocumentLocation(wsdlLocation); + + /* + final Document doc = DocumentBuilderFactory.newInstance() + .newDocumentBuilder().newDocument(); + + final Element referenceElement = doc.createElementNS( + "http://www.driver.org", "driver:ResourceIdentifier"); + referenceElement.setTextContent(resultSetId); + W3CResultSetEPR.referenceParameter(referenceElement); + */ + return W3CResultSetEPR.build(); + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/common/utils/XMLException.java b/dnet-data-services/src/main/java/eu/dnetlib/common/utils/XMLException.java new file mode 100644 index 0000000..6d5bb6c --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/common/utils/XMLException.java @@ -0,0 +1,19 @@ +package eu.dnetlib.common.utils; + +public class XMLException extends Exception { + + static final long serialVersionUID = 2413331108861490367L; + + public XMLException(String errorMessage){ + super(errorMessage); + } + + public XMLException(Exception exc){ + super(exc); + } + + public XMLException(String errorMessage, Throwable e){ + super(errorMessage, e); + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/common/utils/XMLSerializer.java b/dnet-data-services/src/main/java/eu/dnetlib/common/utils/XMLSerializer.java new file mode 100644 index 0000000..0a2c6a1 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/common/utils/XMLSerializer.java @@ -0,0 +1,45 @@ +package eu.dnetlib.common.utils; + +import java.io.StringWriter; + +import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBElement; +import javax.xml.bind.JAXBException; +import javax.xml.bind.Marshaller; +import javax.xml.bind.annotation.XmlRootElement; +import javax.xml.namespace.QName; + + +public class XMLSerializer { + + private Marshaller marshaller; + private Class clazz; + + public XMLSerializer(Class clazz){ + this.clazz = clazz; + try { + init(); + } catch (JAXBException e) { + throw new IllegalArgumentException(e); + } + } + + + protected void init() throws JAXBException{ + Class[] all = {this.clazz}; + JAXBContext context = JAXBContext.newInstance(all); + marshaller = context.createMarshaller(); + marshaller.setProperty("com.sun.xml.bind.xmlDeclaration", false); + } + + public String getAsXml(T record) throws JAXBException{ + final StringWriter buffer = new StringWriter(); + marshaller.marshal(createElement(record), buffer); + return buffer.toString(); + } + + protected JAXBElement createElement(final T value) { + final XmlRootElement annotation = this.clazz.getAnnotation(XmlRootElement.class); + return new JAXBElement(new QName(annotation.namespace(), annotation.name()), this.clazz, null, value); + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/common/utils/XMLUtils.java b/dnet-data-services/src/main/java/eu/dnetlib/common/utils/XMLUtils.java new file mode 100644 index 0000000..f9e0ec4 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/common/utils/XMLUtils.java @@ -0,0 +1,54 @@ +package eu.dnetlib.common.utils; + +import java.io.StringReader; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.DocumentHelper; +import org.dom4j.Node; +import org.dom4j.XPath; +import org.dom4j.io.SAXReader; + +public class XMLUtils { + + private static final SAXReader reader = new SAXReader(); + private static Map nsMap = new HashMap(); + + public static Document getDocument(String document)throws XMLException{ + try{ + return reader.read(new StringReader(document)); + }catch(DocumentException e){ + throw new XMLException(e); + } + } + + public static void setNamespaces(Map nsMap){ + XMLUtils.nsMap = nsMap; + } + + public static String evaluate(String document, String expression)throws XMLException{ + XPath xpath = DocumentHelper.createXPath(expression); + xpath.setNamespaceURIs(XMLUtils.nsMap); + return xpath.valueOf(getDocument(document)); + } + + public static String evaluate(Node document, String expression)throws XMLException{ + XPath xpath = DocumentHelper.createXPath(expression); + xpath.setNamespaceURIs(XMLUtils.nsMap); + return xpath.valueOf(document); + } + + @SuppressWarnings("unchecked") + public static List getNodes(Node document, String expression)throws XMLException{ + XPath xpath = DocumentHelper.createXPath(expression); + xpath.setNamespaceURIs(XMLUtils.nsMap); + return xpath.selectNodes(document, xpath); + } + + public static Node getNode(Node document, String expression){ + return document.selectSingleNode(expression); + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/IDatabaseConnector.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/IDatabaseConnector.java new file mode 100644 index 0000000..9c569ba --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/IDatabaseConnector.java @@ -0,0 +1,20 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation; + +import java.util.List; + +/** + * @author jochen + * + */ +public interface IDatabaseConnector { + + /** + * executes a SQL query + * @param aSQLquery + * @return List containing the results of this query execution + */ + List getResult(String aSQLquery) throws TransformationException; +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/TransformationException.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/TransformationException.java new file mode 100644 index 0000000..8957aed --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/TransformationException.java @@ -0,0 +1,44 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation; + +/** + * @author jochen + * + */ +public class TransformationException extends Exception { + + /** + * + */ + public TransformationException() { + // TODO Auto-generated constructor stub + } + + /** + * @param message + */ + public TransformationException(String message) { + super(message); + // TODO Auto-generated constructor stub + } + + /** + * @param cause + */ + public TransformationException(Throwable cause) { + super(cause); + // TODO Auto-generated constructor stub + } + + /** + * @param message + * @param cause + */ + public TransformationException(String message, Throwable cause) { + super(message, cause); + // TODO Auto-generated constructor stub + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/VocabularyMap.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/VocabularyMap.java new file mode 100644 index 0000000..7a9e785 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/VocabularyMap.java @@ -0,0 +1,38 @@ +package eu.dnetlib.data.collective.transformation; + +import java.util.Map; + +import org.svenson.JSONProperty; +import org.svenson.JSONTypeHint; + +import eu.dnetlib.data.collective.transformation.engine.functions.Vocabulary; + +public class VocabularyMap { + + private Map map; + + /** + * Returns true if the vocabulary map contains the key argument. Method implemented for backward compatibility. + * @param aKey vocabulary name as a key + * @return true if key exist else false + */ + public boolean containsKey(String aKey){ + return map.containsKey(aKey); + } + + /** + * @return the map + */ + @JSONProperty(ignoreIfNull = true) + public Map getMap() { + return map; + } + + /** + * @param map the map to set + */ + @JSONTypeHint(Vocabulary.class) + public void setMap(Map map) { + this.map = map; + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/VocabularyRegistry.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/VocabularyRegistry.java new file mode 100644 index 0000000..2f98453 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/VocabularyRegistry.java @@ -0,0 +1,96 @@ +package eu.dnetlib.data.collective.transformation; + +import javax.annotation.Resource; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dom4j.DocumentException; + +import eu.dnetlib.data.collective.transformation.engine.functions.DateVocabulary; +import eu.dnetlib.data.collective.transformation.engine.functions.IVocabulary; +import eu.dnetlib.data.collective.transformation.engine.functions.PersonVocabulary; +// import eu.dnetlib.data.collective.transformation.engine.functions.PmcVocabulary; +import eu.dnetlib.data.collective.transformation.engine.functions.Vocabulary; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.locators.DefaultUniqueServiceLocator; + +/** + * @author jochen + * + */ +public class VocabularyRegistry { + + private static final Log log = LogFactory.getLog(VocabularyRegistry.class); + private static final String dateVocabularyName = "DateISO8601"; + // private static final String pmcVocabularyName = "PMC"; + private static final String personVocabularyName = "Person"; + + @Resource + private DefaultUniqueServiceLocator uniqueServiceLocator; + private VocabularyMap vocabularies; + private boolean isInitialized = false; + + public void init() { + String vocabularyQueryPrefix = "collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')//RESOURCE_PROFILE"; + String targetVocabulary = ""; + for (String key : vocabularies.getMap().keySet()) { + try { + Vocabulary v = vocabularies.getMap().get(key); + targetVocabulary = vocabularies.getMap().get(key).getName(); + v.setResource(new eu.dnetlib.common.profile.Resource(uniqueServiceLocator.getIsLookupService().getResourceProfileByQuery( + vocabularyQueryPrefix + "[.//VOCABULARY_NAME='" + targetVocabulary + "' or .//VOCABULARY_NAME/@code='" + targetVocabulary + "'] "))); + } catch (ISLookUpDocumentNotFoundException e) { + throw new IllegalStateException("vocabulary profile not found for name or code " + targetVocabulary, e); + } catch (ISLookUpException e) { + log.fatal("ISLookupException in VocabularyRegistry, key = " + key + " : ", e); + throw new IllegalStateException(e); + } catch (DocumentException e) { + log.fatal("DocumentException in VocabularyRegistry, key = " + key + " : ", e); + throw new IllegalStateException(e); + } + } + vocabularies.getMap().put(dateVocabularyName, new DateVocabulary()); + vocabularies.getMap().put(personVocabularyName, new PersonVocabulary()); + // PmcVocabulary pmcVocab = new PmcVocabulary(); + // pmcVocab.setMappingFile(mappingFile); + // vocabularies.getMap().put(pmcVocabularyName, pmcVocab); + isInitialized = true; + log.info("VocabularyRegistry is initialized."); + } + + public IVocabulary getVocabulary(final String aVocabularyName) { + if (!isInitialized) { + init(); + } + return vocabularies.getMap().get(aVocabularyName); + } + + public VocabularyMap getVocabularies() { + if (!isInitialized) { + init(); + } + return vocabularies; + } + + public void setVocabularies(final VocabularyMap vocabularies) { + this.vocabularies = vocabularies; + } + + public void addVocabulary(final String aVocabularyName, final Vocabulary aVocabulary) { + this.vocabularies.getMap().put(aVocabularyName, aVocabulary); + } + + public void removeVocabulary(final String aVocabulary) { + this.vocabularies.getMap().remove(aVocabulary); + } + + public DefaultUniqueServiceLocator getUniqueServiceLocator() { + return uniqueServiceLocator; + } + + public void setUniqueServiceLocator(final DefaultUniqueServiceLocator uniqueServiceLocator) { + this.uniqueServiceLocator = uniqueServiceLocator; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/VocabularyTypeEditor.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/VocabularyTypeEditor.java new file mode 100644 index 0000000..d6982e8 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/VocabularyTypeEditor.java @@ -0,0 +1,25 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation; + +import java.beans.PropertyEditorSupport; + +import org.svenson.JSONParser; + +/** + * @author js + * + */ +public class VocabularyTypeEditor extends PropertyEditorSupport { + + /** + * Sets the property value by parsing the given JsonString. May raise java.lang.IllegalArgumentException if either the String is badly formatted or if this kind of property can't be expressed as text. + * @see java.beans.PropertyEditorSupport#setAsText(java.lang.String) + */ + @Override + public void setAsText(String aJsonString) throws IllegalArgumentException { + VocabularyMap map = JSONParser.defaultJSONParser().parse(VocabularyMap.class, aJsonString); + setValue(map); + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/Namespace.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/Namespace.java new file mode 100644 index 0000000..6843abe --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/Namespace.java @@ -0,0 +1,47 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.core.schema; + +/** + * @author jochen + * + */ +public class Namespace { + + String prefix; + String uri; + + public Namespace(String aPrefix, String aUri) { + this.prefix = aPrefix; + this.uri = aUri; + } + + /** + * @return the prefix + */ + public String getPrefix() { + return prefix; + } + + /** + * @param prefix the prefix to set + */ + public void setPrefix(String prefix) { + this.prefix = prefix; + } + + /** + * @return the uri + */ + public String getUri() { + return uri; + } + + /** + * @param uri the uri to set + */ + public void setUri(String uri) { + this.uri = uri; + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/SchemaAttribute.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/SchemaAttribute.java new file mode 100644 index 0000000..634759f --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/SchemaAttribute.java @@ -0,0 +1,42 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.core.schema; + +/** + * @author jochen + * + */ +public class SchemaAttribute { + + private String name; + private boolean required; + + /** + * @return the name + */ + public String getName() { + return name; + } + + /** + * @param name the name to set + */ + public void setName(String name) { + this.name = name; + } + + /** + * @return the required + */ + public boolean isRequired() { + return required; + } + + /** + * @param required the required to set + */ + public void setRequired(boolean required) { + this.required = required; + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/SchemaElement.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/SchemaElement.java new file mode 100644 index 0000000..75bce2d --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/SchemaElement.java @@ -0,0 +1,160 @@ +package eu.dnetlib.data.collective.transformation.core.schema; + +import java.util.LinkedList; +import java.util.List; + +/** + * @author jochen + * + */ +public class SchemaElement { + + private String targetNamespace; + private String name; + private boolean isRepeatable; + private boolean isRoot; + private boolean containsSimpleType; + private int minOccurs; + private int maxOccurs; + private List childList = new LinkedList(); + private List attributeList = new LinkedList(); + private Namespace namespace; + + /** + * @return the targetNamespace + */ + public String getTargetNamespace() { + return targetNamespace; + } + /** + * @param targetNamespace the targetNamespace to set + */ + public void setTargetNamespace(String targetNamespace) { + this.targetNamespace = targetNamespace; + } + /** + * @return the isRepeatable + */ + public boolean isRepeatable() { + return isRepeatable; + } + /** + * @param isRepeatable the isRepeatable to set + */ + public void setRepeatable(boolean isRepeatable) { + this.isRepeatable = isRepeatable; + } + /** + * @return the isMandatory + */ + public boolean isMandatory() { + if (minOccurs > 0) return true; + return false; + } + + /** + * @return the minOccurs + */ + public int getMinOccurs() { + return minOccurs; + } + /** + * @param minOccurs the minOccurs to set + */ + public void setMinOccurs(int minOccurs) { + this.minOccurs = minOccurs; + } + /** + * @return the maxOccurs + */ + public int getMaxOccurs() { + return maxOccurs; + } + /** + * @param maxOccurs the maxOccurs to set + */ + public void setMaxOccurs(int maxOccurs) { + this.maxOccurs = maxOccurs; + } + /** + * @return the childList + */ + public List getChildList() { + return childList; + } + /** + * @param childList the childList to set + */ + public void setChildList(List childList) { + this.childList = childList; + } + + /** + * @param name the name of the element to set + */ + public void setName(String name) { + this.name = name; + } + + /** + * @return the name of this element + */ + public String getName() { + return name; + } + + /** + * sets true if this element contains a simpleType, false else + * @param containsSimpleType + */ + public void setContainsSimpleType(boolean containsSimpleType) { + this.containsSimpleType = containsSimpleType; + } + + /** + * @return the containsSimpleType + */ + public boolean containsSimpleType() { + return containsSimpleType; + } + + /** + * @param isRoot the isRoot to set + */ + public void setRoot(boolean isRoot) { + this.isRoot = isRoot; + } + + /** + * @return the isRoot + */ + public boolean isRoot() { + return isRoot; + } + /** + * @param namespace the namespace to set + */ + public void setNamespace(Namespace namespace) { + this.namespace = namespace; + } + /** + * @return the namespace + */ + public Namespace getNamespace() { + return namespace; + } + /** + * @return the attributeList + */ + public List getAttributeList() { + return attributeList; + } + /** + * @param attributeList the attributeList to set + */ + public void addAttribute(SchemaAttribute aAttribute) { + this.attributeList.add(aAttribute); + } + + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/SchemaInspector.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/SchemaInspector.java new file mode 100644 index 0000000..a585007 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/SchemaInspector.java @@ -0,0 +1,98 @@ +package eu.dnetlib.data.collective.transformation.core.schema; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.util.List; + +import org.xml.sax.SAXException; + +import com.sun.xml.xsom.XSContentType; +import com.sun.xml.xsom.XSElementDecl; +import com.sun.xml.xsom.XSParticle; +import com.sun.xml.xsom.XSSchemaSet; +import com.sun.xml.xsom.XSTerm; +import com.sun.xml.xsom.parser.XSOMParser; + +import eu.dnetlib.data.collective.transformation.core.schema.visitor.Visitor; + +/** + * @author jochen + * + */ +public class SchemaInspector { + + private List elementList = new java.util.LinkedList(); + private boolean inspected = false; + private String rootElement; + + public void inspect(File aSchema, String aRootElement) throws SAXException, IOException{ + XSOMParser parser = new XSOMParser(); + parser.parse(aSchema); + doInspect(parser, aRootElement); + } + + public void inspect(URL aSchema, String aRootElement)throws SAXException{ + XSOMParser parser = new XSOMParser(); + parser.parse(aSchema); + doInspect(parser, aRootElement); + } + + /** + * inspects the schema and creates a new list of schema elements. + * @param parser + * @param aRootElement + * @throws SAXException + */ + private void doInspect(XSOMParser parser, String aRootElement) throws SAXException{ + this.rootElement = aRootElement; +// for (SchemaDocument doc: parser.getDocuments()){ +// Map attgrdecls = doc.getSchema().getAttributeDecls(); +// for (String k: attgrdecls.keySet()){ +// System.out.println("keyxs: " + k); +// } +// } + Visitor visitor = new Visitor(); + XSSchemaSet sset = parser.getResult(); +// Iterator it = sset.iterateAttributeDecls(); +// while(it.hasNext()){ +// System.out.println(it.next().getName()); +// } + XSElementDecl elemDecl = sset.getElementDecl("", aRootElement); + if (elemDecl == null){ + throw new IllegalStateException("rootElement " + aRootElement + " not found in schema."); + } + // assuming the root element is of complex type + if (elemDecl.getType().isComplexType()){ + XSContentType contentType = elemDecl.getType().asComplexType().getContentType(); + XSParticle particle = contentType.asParticle(); + if (particle != null){ + XSTerm term = particle.getTerm(); + term.visit(visitor); + } + } + this.elementList = visitor.getElements(); + this.inspected = true; + } + + /** + * @return the inspected + */ + public boolean isInspected() { + return inspected; + } + + /** + * @return the rootElement + */ + public String getRootElement() { + return rootElement; + } + + /** + * @return the child elements + */ + public List getChildElements(){ + return elementList; + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/visitor/Visitor.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/visitor/Visitor.java new file mode 100644 index 0000000..cb35e38 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/visitor/Visitor.java @@ -0,0 +1,168 @@ +package eu.dnetlib.data.collective.transformation.core.schema.visitor; + + +import java.util.LinkedList; +import java.util.List; + +import org.apache.commons.lang3.NotImplementedException; +//import org.apache.commons.logging.Log; +//import org.apache.commons.logging.LogFactory; + +import com.sun.xml.xsom.XSAnnotation; +import com.sun.xml.xsom.XSAttGroupDecl; +import com.sun.xml.xsom.XSAttributeDecl; +import com.sun.xml.xsom.XSAttributeUse; +import com.sun.xml.xsom.XSComplexType; +import com.sun.xml.xsom.XSContentType; +import com.sun.xml.xsom.XSElementDecl; +import com.sun.xml.xsom.XSFacet; +import com.sun.xml.xsom.XSIdentityConstraint; +import com.sun.xml.xsom.XSModelGroup; +import com.sun.xml.xsom.XSModelGroupDecl; +import com.sun.xml.xsom.XSNotation; +import com.sun.xml.xsom.XSParticle; +import com.sun.xml.xsom.XSSchema; +import com.sun.xml.xsom.XSSimpleType; +import com.sun.xml.xsom.XSType; +import com.sun.xml.xsom.XSWildcard; +import com.sun.xml.xsom.XSXPath; +import com.sun.xml.xsom.visitor.XSVisitor; + +import eu.dnetlib.data.collective.transformation.core.schema.SchemaAttribute; +import eu.dnetlib.data.collective.transformation.core.schema.SchemaElement; + +/** + * @author jochen + * + */ +public class Visitor implements XSVisitor { + + //private static Log log = LogFactory.getLog(Visitor.class); + List schemaElements = new LinkedList(); + SchemaElement currentElement; + SchemaAttribute currentAttribute; + + @Override + public void annotation(XSAnnotation arg0) { + throw new NotImplementedException("TODO: annotation"); + } + + @Override + public void attGroupDecl(XSAttGroupDecl arg0) { + throw new NotImplementedException("TODO attGroupDecl"); + } + + @Override + public void attributeDecl(XSAttributeDecl aAttributeDecl) { + currentAttribute.setName(aAttributeDecl.getName()); + //log.debug("visit attribute name: " + aAttributeDecl.getName()); + //log.debug("visit attribute type: " + aAttributeDecl.getType()); + throw new NotImplementedException("TODO attributeDecl"); + } + + @Override + public void attributeUse(XSAttributeUse aAttributeUse) { + throw new NotImplementedException("TODO attributeUse"); + } + + @Override + public void complexType(XSComplexType aType) { + if (aType.getDerivationMethod()== XSType.RESTRICTION){ + XSContentTypeVisitorImpl contentTypeVisitor = new XSContentTypeVisitorImpl(); + contentTypeVisitor.setVisitor(this); + aType.getContentType().visit(contentTypeVisitor); + }else{ + // aType.getExplicitContent().visit(this); + throw new NotImplementedException("visiting types other then 'RESTRICTION are not implemented'"); + } + } + + @Override + public void facet(XSFacet arg0) { + throw new NotImplementedException("TODO facet"); + } + + @Override + public void identityConstraint(XSIdentityConstraint arg0) { + throw new NotImplementedException("TODO identityConstraint"); + } + + @Override + public void notation(XSNotation arg0) { + throw new NotImplementedException("TODO notation"); + } + + @Override + public void schema(XSSchema arg0) { + throw new NotImplementedException("TODO schema"); + } + + @Override + public void xpath(XSXPath arg0) { + throw new NotImplementedException("TODO xpath"); + } + + @Override + public void elementDecl(XSElementDecl aElementDecl) { + XSType type = aElementDecl.getType(); + if (type.isLocal()){ + // complete infos about the current element + // log.debug("visitor element name: " + aElementDecl.getName()); + currentElement.setName(aElementDecl.getName()); + currentElement.setTargetNamespace(aElementDecl.getTargetNamespace()); + type.visit(this); + } + } + + @Override + public void modelGroup(XSModelGroup aGroup) { + // a group of elements as childs of the root element + for (XSParticle p: aGroup.getChildren()){ + particle(p); + } + } + + @Override + public void modelGroupDecl(XSModelGroupDecl arg0) { + throw new NotImplementedException("TODO modelGroupDecl"); + } + + @Override + public void wildcard(XSWildcard arg0) { + throw new NotImplementedException("TODO wildcard"); + } + + @Override + public void empty(XSContentType arg0) { + throw new NotImplementedException("TODO empty"); + } + + @Override + public void particle(XSParticle aParticle) { + // create a new schema element, add to the list of schema elements, set this element as current element + SchemaElement element = new SchemaElement(); + element.setMinOccurs(aParticle.getMinOccurs().intValue()); + element.setMaxOccurs(aParticle.getMaxOccurs().intValue()); + element.setRepeatable(aParticle.isRepeated()); + schemaElements.add(element); + currentElement = element; + XSTermVisitorImpl termVisitor = new XSTermVisitorImpl(); + termVisitor.setVisitor(this); + aParticle.getTerm().visit(termVisitor); + } + + + @Override + public void simpleType(XSSimpleType arg0) { + throw new NotImplementedException("TODO simpleType"); + } + + public List getElements(){ + return this.schemaElements; + } + + protected SchemaElement getCurrentElement(){ + return currentElement; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/visitor/XSContentTypeVisitorImpl.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/visitor/XSContentTypeVisitorImpl.java new file mode 100644 index 0000000..5b8e223 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/visitor/XSContentTypeVisitorImpl.java @@ -0,0 +1,82 @@ +package eu.dnetlib.data.collective.transformation.core.schema.visitor; + +import java.util.Collection; +import java.util.Iterator; + +import org.apache.commons.lang3.NotImplementedException; + +import com.sun.xml.xsom.XSAttributeUse; +import com.sun.xml.xsom.XSContentType; +import com.sun.xml.xsom.XSElementDecl; +import com.sun.xml.xsom.XSParticle; +import com.sun.xml.xsom.XSSimpleType; +import com.sun.xml.xsom.XSType; +import com.sun.xml.xsom.visitor.XSContentTypeVisitor; + +import eu.dnetlib.data.collective.transformation.core.schema.SchemaAttribute; +import eu.dnetlib.data.collective.transformation.core.schema.SchemaElement; + +/** + * @author jochen + * + */ +public class XSContentTypeVisitorImpl implements XSContentTypeVisitor { + + private Visitor visitor; + + @Override + public void empty(XSContentType arg0) { + throw new NotImplementedException("TODO empty"); + } + + @Override + public void particle(XSParticle aParticle) { + XSTermVisitorImpl termVisitor = new XSTermVisitorImpl(); + termVisitor.setVisitor(this.visitor); + aParticle.getTerm().visit(termVisitor); + if (aParticle.getTerm().isElementDecl()){ + XSElementDecl elem = aParticle.getTerm().asElementDecl(); + SchemaElement element = new SchemaElement(); + + XSType type = elem.getType(); + if (type.isComplexType()){ + Collection attrColls = + type.asComplexType().getDeclaredAttributeUses(); + Iterator attrIter = attrColls.iterator(); + while (attrIter.hasNext()){ + XSAttributeUse attrUse = attrIter.next(); + SchemaAttribute attribute = new SchemaAttribute(); + attribute.setName(attrUse.getDecl().getName()); + attribute.setRequired(attrUse.isRequired()); + element.addAttribute(attribute); + } + } + element.setName(elem.getName()); + element.setTargetNamespace(elem.getTargetNamespace()); + element.setMinOccurs(aParticle.getMinOccurs().intValue()); + element.setMaxOccurs(aParticle.getMaxOccurs().intValue()); + element.setRepeatable(aParticle.isRepeated()); + + if (elem.getType().isComplexType()){ + if (elem.getType().asComplexType().getContentType().asSimpleType() != null){ + element.setContainsSimpleType(true); + } + } + this.visitor.getCurrentElement().getChildList().add(element); + } + } + + @Override + public void simpleType(XSSimpleType arg0) { + throw new NotImplementedException("TODO simpleType"); + } + + public void setVisitor(Visitor visitor) { + this.visitor = visitor; + } + + public Visitor getVisitor() { + return visitor; + } + +} \ No newline at end of file diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/visitor/XSTermVisitorImpl.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/visitor/XSTermVisitorImpl.java new file mode 100644 index 0000000..fa5636b --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/schema/visitor/XSTermVisitorImpl.java @@ -0,0 +1,56 @@ +package eu.dnetlib.data.collective.transformation.core.schema.visitor; + +import org.apache.commons.lang3.NotImplementedException; + +import com.sun.xml.xsom.XSElementDecl; +import com.sun.xml.xsom.XSModelGroup; +import com.sun.xml.xsom.XSModelGroupDecl; +import com.sun.xml.xsom.XSParticle; +import com.sun.xml.xsom.XSWildcard; +import com.sun.xml.xsom.visitor.XSTermVisitor; + +/** + * @author jochen + * + */ +public class XSTermVisitorImpl implements XSTermVisitor { + + private Visitor visitor; + + @Override + public void elementDecl(XSElementDecl aElementDecl) { + if (aElementDecl.isLocal()){ + this.visitor.elementDecl(aElementDecl); + }else{ + // ignore non local element declarations + } + } + + @Override + public void modelGroup(XSModelGroup aModelGroup) { + XSContentTypeVisitorImpl contentTypeVisitor = new XSContentTypeVisitorImpl(); + contentTypeVisitor.setVisitor(this.visitor); + for (XSParticle p: aModelGroup.getChildren()){ + contentTypeVisitor.particle(p); + } + } + + @Override + public void modelGroupDecl(XSModelGroupDecl arg0) { + throw new NotImplementedException("TODO modelGroupDecl"); + } + + @Override + public void wildcard(XSWildcard arg0) { + throw new NotImplementedException("TODO wildcard"); + } + + public void setVisitor(Visitor visitor) { + this.visitor = visitor; + } + + public Visitor getVisitor() { + return visitor; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/AbstractXslElement.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/AbstractXslElement.java new file mode 100644 index 0000000..9ca2eb3 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/AbstractXslElement.java @@ -0,0 +1,48 @@ +package eu.dnetlib.data.collective.transformation.core.xsl; + +import java.util.LinkedList; +import java.util.List; + +/** + * @author jochen + * + */ +public abstract class AbstractXslElement { + + private String functionName; + protected List attrList = new LinkedList(); + protected StringBuilder enclosedElements = new StringBuilder(); + protected List nsList = new LinkedList(); + + + public AbstractXslElement(String aFunctioName) { + this.functionName = aFunctioName; + } + + public String asXml(boolean isEmpty){ + StringBuilder builder = new StringBuilder(); + builder.append("<"); + builder.append(functionName + " "); + for (String ns: nsList){ + builder.append(ns + " "); + } + + for (String attr: attrList){ + builder.append(attr); + } + if (isEmpty){ + builder.append("/>"); + }else{ + builder.append(">"); + builder.append(enclosedElements.toString()); + builder.append(""); + } + return builder.toString(); + } + + public String asXml() { + return asXml(false); + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/XslConstructor.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/XslConstructor.java new file mode 100644 index 0000000..27bd8e0 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/XslConstructor.java @@ -0,0 +1,406 @@ +package eu.dnetlib.data.collective.transformation.core.xsl; + + +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; + +/** + * @author jochen + * + */ +public class XslConstructor { + private static final Log log = LogFactory.getLog(XslConstructor.class); + + public String writeOutVariableRule(Rules rule){ + XslElement xsltVariable = new XslElement(XsltConstants.param); + xsltVariable.addAttribute("name", rule.getUniqueName().substring(1)); + if (rule.getXpath().length() > 0){ + xsltVariable.addAttribute("select", rule.getXpath()); + }else if (rule.getFunctionCall() != null){ + // TODO - set the value if rule is static + if (rule.getFunctionCall().doPreprocess()){ + xsltVariable.addAttribute("select", rule.getFunctionCall().getXSLpreparatedFunctionCall()); + }else{ + xsltVariable.addAttribute("select", rule.getFunctionCall().getXSLdirectFunctionCall(null)); + } + }else{ + if (rule.getConstant().length() > 0){ + xsltVariable.addAttribute("select", rule.getConstant()); + } + } + return xsltVariable.asXml(); + } + + /** + * @param rule + * @return + */ + public String writeOutConditionalChooseComplex(Rules rule){ + XslElement choose = new XslElement(XsltConstants.choose); + XslElement when = new XslElement(XsltConstants.when); + log.debug("XslConstructor: conditionExpresssion: " + rule.getCondition().getConditionExpression()); + when.addAttribute("test", rule.getCondition().getConditionExpression()); + when.addEnclosedElements(this.writeOutRuleComplex(rule.getCondition().getPrimaryRule(), rule.getCondition().getPrimaryRule().getUniqueName())); + choose.addEnclosedElements(when.asXml()); + XslElement otherwise = new XslElement(XsltConstants.otherwise); + otherwise.addEnclosedElements(this.writeOutRuleComplex(rule.getCondition().getSecondaryRule(), rule.getCondition().getSecondaryRule().getUniqueName())); + choose.addEnclosedElements(otherwise.asXml()); + return choose.asXml(); + } + + public String writeOutConditionalIfComplex(Rules rule){ + XslElement ifCondition = new XslElement(XsltConstants.ifCondition); + if (rule.getCondition().isPrimary(rule)){ + ifCondition.addAttribute("test", rule.getCondition().getConditionExpression()); + ifCondition.addEnclosedElements(this.writeOutRuleComplex(rule.getCondition().getPrimaryRule(), rule.getCondition().getPrimaryRule().getUniqueName())); +// ifCondition.addEnclosedElements(this.writeOutRule(rule, rule.getTargetField())); + }else{ + ifCondition.addAttribute("test", "not(" + rule.getCondition().getConditionExpression() + ")"); + ifCondition.addEnclosedElements(this.writeOutRuleComplex(rule.getCondition().getPrimaryRule(), rule.getCondition().getPrimaryRule().getUniqueName())); +// ifCondition.addEnclosedElements(this.writeOutRule(rule, rule.getTargetField())); + } + return ifCondition.asXml(); + + } + /** + * @param rule + * @param targetFieldName + * @return + */ + public String writeOutRuleComplex(Rules rule, String targetFieldName){ + // TODO a lot + XslElement targetField = new XslElement(targetFieldName); + if (rule.hasSet()){ + for (Rules attrRule: rule.getRulesSet().getPendingRules()){ + if (attrRule.getConstant().length() > 0){ + targetField.addAttribute(attrRule.getAttribute(), attrRule.getConstant()); // TODO: check for value type (constant, function, xpath etc.) + }else if (attrRule.getAssignmentVariable().length() > 0){ + XslElement attr = new XslElement(XsltConstants.attribute); + attr.addAttribute("name", attrRule.getAttribute()); + + XslElement valueOf = new XslElement(XsltConstants.valueOf, "select", attrRule.getAssignmentVariable()); + attr.addEnclosedElements(valueOf.asXml()); + targetField.addEnclosedElements(attr.asXml()); + }else if (attrRule.getXpath().length() > 0){ + XslElement attr = new XslElement(XsltConstants.attribute); + attr.addAttribute("name", attrRule.getAttribute()); + XslElement valueOf = new XslElement(XsltConstants.valueOf, "select", attrRule.getXpath()); + attr.addEnclosedElements(valueOf.asXml()); + targetField.addEnclosedElements(attr.asXml()); + }else{ + XslElement valueOf; + log.debug("name of the rule in writeOutRuleComplex: " + rule.getUniqueName() + " " + attrRule.getAttribute()); + if (attrRule.getFunctionCall().doPreprocess()){ + valueOf = new XslElement(XsltConstants.valueOf, "select", attrRule.getFunctionCall().getXSLpreparatedFunctionCall()); + }else{ + valueOf = new XslElement(XsltConstants.valueOf, "select", attrRule.getFunctionCall().getXSLdirectFunctionCall(null)); + } + XslElement attr = new XslElement(XsltConstants.attribute); + attr.addAttribute("name", attrRule.getAttribute()); + attr.addEnclosedElements(valueOf.asXml()); + targetField.addEnclosedElements(attr.asXml()); + } + } + log.debug(targetField.asXml()); + + } + log.debug("unique name of rule: " + rule.getUniqueName()); + + if (rule.getXpath().length() > 0){ + // TODO for-each +// throw new IllegalStateException("complex rule with Constant Assignment not yet implemented"); + XslElement valueOf = new XslElement(XsltConstants.valueOf, "select", "."); +// // valueOf.setBoundPrefix(currentRule.getNamespace()); // needed here? + XslElement forEach = new XslElement(XsltConstants.forEach); + forEach.addAttribute("select", rule.getXpath()); +// +// XslElement targetField = new XslElement(targetFieldName); + targetField.addEnclosedElements(valueOf.asXml()); + forEach.addEnclosedElements(targetField.asXml()); +// + return forEach.asXml(); + + }else if (rule.getAssignmentVariable().length() > 0){ + log.debug("assignmentVar: " + rule.getAssignmentVariable()); + XslElement valueOf = new XslElement(XsltConstants.valueOf, "select", rule.getAssignmentVariable()); + targetField.addEnclosedElements(valueOf.asXml()); + log.debug(targetField.asXml()); + }else if (rule.getConstant().length() > 0){ + throw new IllegalStateException("complex rule with Constant Assignment not yet implemented"); + } + + return targetField.asXml(); + +// if (rule.getConstant().length() > 0){ +// if (rule.getAttribute().length() > 0){ +// XslElement targetField = new XslElement(targetFieldName, rule.getAttribute(), rule.getConstant()); +// System.out.println("XslConstructor: " + rule.getUniqueName()); +// System.out.println("XslConstructor: " + rule.hasSet()); +// System.out.println("XslConstructor: defines attribute: " + rule.definesAttribute()); +// System.out.println("XslConstructor: attribute: " + rule.getAttribute()); +// +// Iterator rulesIterator = rule.getRulesSet().getPendingRules().iterator(); +// while (rulesIterator.hasNext()){ +// Rules pendingRule = rulesIterator.next(); +// targetField.addAttribute(pendingRule.getAttribute(), pendingRule.getConstant()); +// } +// return targetField.asXml(); +// } +// } +// return ""; + } + + public String writeOutRuleCopy(Rules rule, String targetFieldName){ + XslElement targetField = new XslElement(targetFieldName); + XslElement copy = new XslElement(XsltConstants.copyOf, "select", rule.getProperties().getProperty("copySelectExpression")); + targetField.addEnclosedElements(copy.asXml(true)); + return targetField.asXml(); + } + + public String writeOutRule(Rules rule, String targetFieldName){ + if (rule.getXpath().length() > 0){ + // TODO consider the namespace + // TODO consider the number of occurrences of the source element - limited or unlimited, currently unlimited + XslElement valueOf = new XslElement(XsltConstants.valueOf, "select", "."); + // valueOf.setBoundPrefix(currentRule.getNamespace()); // needed here? + XslElement forEach = new XslElement(XsltConstants.forEach); + forEach.addAttribute("select", rule.getXpath()); + + XslElement targetField = new XslElement(targetFieldName); + targetField.addEnclosedElements(valueOf.asXml()); + forEach.addEnclosedElements(targetField.asXml()); + + return forEach.asXml(); + + }else if (rule.getConstant().length() > 0){ + + // TODO case-distinction needed + // (1) it's a constant + // (2) it's an external function call + // (2.1) the external function call's argument may contain an xpath-expression, which has to be determined + + // do not create 'for-each', just process the rule-function + XslElement xslText = new XslElement(XsltConstants.text); + xslText.addEnclosedElements(rule.getConstant()); + + XslElement targetField = new XslElement(targetFieldName); + targetField.addEnclosedElements(xslText.asXml()); + return targetField.asXml(); + }else if (rule.getAssignmentVariable().length() > 0){ + // hardcoded workaround + XslElement targetField = new XslElement(targetFieldName); +// if (rule.getAssignmentVariable().equals("varId")){ +// XslElement copyOf = new XslElement(XsltConstants.copyOf, "select", rule.getAssignmentVariable()); +// targetField.addEnclosedElements(copyOf.asXml()); +// }else{ + XslElement valueOf = new XslElement(XsltConstants.valueOf, "select", rule.getAssignmentVariable()); + targetField.addEnclosedElements(valueOf.asXml()); +// } + return targetField.asXml(); + }else if (rule.isEmpty()){ + XslElement targetField = new XslElement(targetFieldName); + return targetField.asXml(); + }else if (rule.isSkip()){ + // TODO + XslElement callTemplate = new XslElement(XsltConstants.callTemplate, "name", "terminate"); + //XslElement msgField = new XslElement(XsltConstants.message, "terminate", "yes"); + //XslElement xslText = new XslElement(XsltConstants.text); + //xslText.addEnclosedElements("some default exception message"); + //msgField.addEnclosedElements(xslText.asXml()); + return callTemplate.asXml(); + }else{ + XslElement valueOf; + if (rule.getFunctionCall().doPreprocess()){ + valueOf = new XslElement(XsltConstants.valueOf, "select", rule.getFunctionCall().getXSLpreparatedFunctionCall()); + }else{ + valueOf = new XslElement(XsltConstants.valueOf, "select", rule.getFunctionCall().getXSLdirectFunctionCall(null)); + } + //valueOf.setBoundPrefix(ns_dnetExt); + + XslElement targetField = new XslElement(targetFieldName); + targetField.addEnclosedElements(valueOf.asXml()); + + return targetField.asXml(); + } + } + + public String writeOutConditionalChoose(Rules rule){ + XslElement choose = new XslElement(XsltConstants.choose); + XslElement when = new XslElement(XsltConstants.when); + when.addAttribute("test", rule.getCondition().getConditionExpression()); + when.addEnclosedElements(this.writeOutRule(rule.getCondition().getPrimaryRule(), rule.getCondition().getPrimaryRule().getUniqueName())); + choose.addEnclosedElements(when.asXml()); + XslElement otherwise = new XslElement(XsltConstants.otherwise); + otherwise.addEnclosedElements(this.writeOutRule(rule.getCondition().getSecondaryRule(), rule.getCondition().getSecondaryRule().getUniqueName())); + choose.addEnclosedElements(otherwise.asXml()); + return choose.asXml(); + } + + public String writeOutConditionalIf(Rules rule){ + XslElement ifCondition = new XslElement(XsltConstants.ifCondition); + if (rule.getCondition().isPrimary(rule)){ + ifCondition.addAttribute("test", rule.getCondition().getConditionExpression()); + ifCondition.addEnclosedElements(this.writeOutRule(rule, rule.getTargetField())); + }else{ + ifCondition.addAttribute("test", "not(" + rule.getCondition().getConditionExpression() + ")"); + ifCondition.addEnclosedElements(this.writeOutRule(rule, rule.getTargetField())); + } + return ifCondition.asXml(); + } + + public String writeOutApplyTemplates(String selectValue){ + XslElement applyTemplates = new XslElement(XsltConstants.applyTemplates, "select", selectValue); + return applyTemplates.asXml(true); + } + + public String writeOutCallTemplate(String aTemplateName){ + XslElement callTemplate = new XslElement(XsltConstants.callTemplate, "name", aTemplateName); + return callTemplate.asXml(); + } + + public String writeOutApplyConditionalTemplateChoose(Rules rule){ + XslElement forEach = new XslElement(XsltConstants.forEach); + forEach.addAttribute("select", rule.getCondition().getApplyExpression()); + XslElement choose = new XslElement(XsltConstants.choose); + XslElement when = new XslElement(XsltConstants.when); + when.addAttribute("test", rule.getCondition().getConditionExpression()); + XslElement targetField = new XslElement(rule.getCondition().getPrimaryRule().getTargetField()); + // TODO check type of rule -> xpath, constant, function + XslElement valueOf = null; + Rules pRule = rule.getCondition().getPrimaryRule(); + if (pRule.getFunctionCall() != null){ + if (pRule.getFunctionCall().doPreprocess()){ + valueOf = new XslElement(XsltConstants.valueOf, "select", pRule.getFunctionCall().getXSLpreparatedFunctionCall()); + }else{ + valueOf = new XslElement(XsltConstants.valueOf, "select", pRule.getFunctionCall().getXSLdirectFunctionCall(null)); + } + }else{ + valueOf = new XslElement(XsltConstants.valueOf, "select", pRule.getXpath()); + } + targetField.addEnclosedElements(valueOf.asXml()); + when.addEnclosedElements(targetField.asXml()); + choose.addEnclosedElements(when.asXml()); + XslElement otherwise = new XslElement(XsltConstants.otherwise); + targetField = new XslElement(rule.getCondition().getSecondaryRule().getTargetField()); + // TODO check type of rule -> xpath, constant, function + Rules sRule = rule.getCondition().getSecondaryRule(); + if (sRule.getFunctionCall() != null){ + if (sRule.getFunctionCall().doPreprocess()){ + valueOf = new XslElement(XsltConstants.valueOf, "select", sRule.getFunctionCall().getXSLpreparatedFunctionCall()); + }else{ + valueOf = new XslElement(XsltConstants.valueOf, "select", sRule.getFunctionCall().getXSLdirectFunctionCall(null)); + } + }else{ + valueOf = new XslElement(XsltConstants.valueOf, "select", sRule.getXpath()); + } + targetField.addEnclosedElements(valueOf.asXml()); + otherwise.addEnclosedElements(targetField.asXml()); + choose.addEnclosedElements(otherwise.asXml()); + return forEach.asXml(); + } + + public String writeOutApplyConditionalTemplateIf(Rules rule, boolean isComplexStructure){ + // TODO check primary, alternative rule -> if, choose + XslElement forEach = new XslElement(XsltConstants.forEach); + forEach.addAttribute("select", rule.getCondition().getApplyExpression()); + + // store position in variable + XslElement posVar = new XslElement(XsltConstants.variable); + posVar.addAttribute("name", "posVar"); + posVar.addAttribute("select", "position()"); + forEach.addEnclosedElements(posVar.asXml()); + // if case + XslElement ifCondition = new XslElement(XsltConstants.ifCondition); + if (rule.getCondition().isPrimary(rule)){ + ifCondition.addAttribute("test", rule.getCondition().getConditionExpression()); + Rules pRule = rule.getCondition().getPrimaryRule(); + if (isComplexStructure){ + ifCondition.addEnclosedElements(writeOutRuleComplex(pRule, pRule.getUniqueName())); + }else{ + XslElement targetField = new XslElement(pRule.getTargetField()); + // TODO check type of rule -> xpath, constant, function + XslElement valueOf = null; + if (pRule.getFunctionCall() != null){ + if (pRule.getFunctionCall().doPreprocess()){ + valueOf = new XslElement(XsltConstants.valueOf, "select", pRule.getFunctionCall().getXSLpositionFunctionCall()); + }else{ + valueOf = new XslElement(XsltConstants.valueOf, "select", pRule.getFunctionCall().getXSLdirectFunctionCall(null)); + } + }else{ + valueOf = new XslElement(XsltConstants.valueOf, "select", pRule.getXpath()); + } + targetField.addEnclosedElements(valueOf.asXml()); + ifCondition.addEnclosedElements(targetField.asXml()); + } + forEach.addEnclosedElements(ifCondition.asXml()); + }else{ + ifCondition.addAttribute("test", "not(" + rule.getCondition().getConditionExpression() + ")"); + Rules sRule = rule.getCondition().getSecondaryRule(); + if (isComplexStructure){ + ifCondition.addEnclosedElements(writeOutRuleComplex(sRule, sRule.getUniqueName())); + }else{ + XslElement targetField = new XslElement(sRule.getTargetField()); + // TODO check type of rule -> xpath, constant, function + XslElement valueOf = null; + + if (sRule.getFunctionCall() != null){ + if (sRule.getFunctionCall().doPreprocess()){ + valueOf = new XslElement(XsltConstants.valueOf, "select", sRule.getFunctionCall().getXSLpositionFunctionCall()); + }else{ + valueOf = new XslElement(XsltConstants.valueOf, "select", sRule.getFunctionCall().getXSLdirectFunctionCall(null)); + } + }else{ + valueOf = new XslElement(XsltConstants.valueOf, "select", sRule.getXpath()); + } + targetField.addEnclosedElements(valueOf.asXml()); + ifCondition.addEnclosedElements(targetField.asXml()); + } + forEach.addEnclosedElements(ifCondition.asXml()); + } + return forEach.asXml(); + } + + public XslElement writeOutRecursiveTemplate(Rules rule, String templateName, Map nsDeclarations) { + XslElement subTemplate = new XslElement(XsltConstants.template, "name", templateName); + XslElement param = new XslElement(XsltConstants.param); + param.addAttribute("name", templateName + "param"); + param.addAttribute("select", rule.getFunctionCall().getXSLdirectFunctionCall(templateName)); // TODO functionCall + subTemplate.addEnclosedElements(param.asXml()); + + XslElement forEach = new XslElement(XsltConstants.forEach); + forEach.addAttribute("select", "$" + templateName + "param"); + XslElement element = new XslElement(XsltConstants.element); + // split ns element name into element name and namespace + String targetElementName = rule.getFunctionCall().getParameters().get("elementName"); + String namespace = ""; + if (targetElementName.contains(":")){ + String[] nameSplitting = targetElementName.split(":"); + targetElementName = nameSplitting[1]; + namespace = nsDeclarations.get(nameSplitting[0]); + } + element.addAttribute("name", targetElementName); + element.addAttribute("namespace", namespace); + element.addAttribute("inherit-namespaces", "yes"); + + XslElement valueOf = new XslElement(XsltConstants.valueOf, "select", "."); + + element.addEnclosedElements(valueOf.asXml()); + forEach.addEnclosedElements(element.asXml()); + subTemplate.addEnclosedElements(forEach.asXml()); + return subTemplate; + } + + public String writeOutApplyAbout() { + //XslElement about = new XslElement("about"); + XslElement copy = new XslElement(XsltConstants.copyOf, "select", "@*|//*[local-name()='about']"); +// XslElement applyTemplatesSelect = new XslElement(XsltConstants.applyTemplates, "select", "@*|node()"); +// copy.addEnclosedElements(applyTemplatesSelect.asXml()); + //about.addEnclosedElements(copy.asXml()); + return copy.asXml(); + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/XslElement.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/XslElement.java new file mode 100644 index 0000000..9a845e0 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/XslElement.java @@ -0,0 +1,43 @@ +package eu.dnetlib.data.collective.transformation.core.xsl; + +import java.util.Collection; + + +/** + * @author jochen + * + */ +public class XslElement extends AbstractXslElement { + + public XslElement(String aFunctionName) { + super(aFunctionName); + } + + public XslElement(String aFunctionName, String aAttrName, String aAttrValue) { + super(aFunctionName); + addAttribute(aAttrName, aAttrValue); + } + + public void addAttribute(String aName, String aValue){ + this.attrList.add(aName + "=\"" + aValue + "\" "); + } + + public void addEnclosedElements(String aElements){ + this.enclosedElements.append(aElements); + } + + public void addBoundPrefix(String aNamespace){ + this.nsList.add(aNamespace); + } + + public void addAllBoundPrefixes(Collection aNamespaceList){ + this.nsList.addAll(aNamespaceList); + } + + /** + * @return the isEmpty + */ + public boolean isEmpty() { + return !(enclosedElements.length() > 0); + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/XsltConstants.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/XsltConstants.java new file mode 100644 index 0000000..2d2b2b6 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/XsltConstants.java @@ -0,0 +1,32 @@ +package eu.dnetlib.data.collective.transformation.core.xsl; + +/** + * + * @author js + * + */ +public class XsltConstants { + + public static final String applyTemplates = "xsl:apply-templates"; + public static final String attribute = "xsl:attribute"; + public static final String callTemplate = "xsl:call-template"; + public static final String choose = "xsl:choose"; + public static final String copy = "xsl:copy"; + public static final String copyOf = "xsl:copy-of"; + public static final String element = "xsl:element"; + public static final String forEach = "xsl:for-each"; + public static final String ifCondition = "xsl:if"; + public static final String message = "xsl:message"; + public static final String otherwise = "xsl:otherwise"; + public static final String param = "xsl:param"; + public static final String template = "xsl:template"; + public static final String text = "xsl:text"; + public static final String valueOf = "xsl:value-of"; + public static final String variable = "xsl:variable"; + public static final String when = "xsl:when"; + public static final String withParam = "xsl:with-param"; + + public static final String nsXsl = "xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\""; + public static final String extFuncNS = "TransformationFunction"; + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/ext/TransformationFunctionProxy.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/ext/TransformationFunctionProxy.java new file mode 100644 index 0000000..4f8e32d --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/core/xsl/ext/TransformationFunctionProxy.java @@ -0,0 +1,345 @@ +package eu.dnetlib.data.collective.transformation.core.xsl.ext; + +import java.io.StringReader; +import java.io.StringWriter; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import javax.xml.XMLConstants; +import javax.xml.namespace.NamespaceContext; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.transform.Result; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathFactory; + +import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.svenson.JSONParser; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.w3c.dom.Text; +import org.xml.sax.InputSource; + +import eu.dnetlib.data.collective.transformation.engine.FunctionResults; +import eu.dnetlib.data.collective.transformation.engine.functions.Convert; +import eu.dnetlib.data.collective.transformation.engine.functions.IdentifierExtract; +import eu.dnetlib.data.collective.transformation.engine.functions.LookupRecord; +import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException; +import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression; +import eu.dnetlib.data.collective.transformation.engine.functions.Split; + +/** + * The class implements external XSLT functions + * @author jochen + * + */ +public class TransformationFunctionProxy { + + @SuppressWarnings("unused") + private static final Log log = LogFactory.getLog(TransformationFunctionProxy.class); + private static TransformationFunctionProxy tf; + private RegularExpression regExprFunction = new RegularExpression(); + private Convert convertFunction; + private IdentifierExtract identifierExtractFunction = new IdentifierExtract(); + private static DocumentBuilder docBuilder; + private static Transformer transformer; + private Split split = new Split(); + private Map mapOfResults = new HashMap(); + private LookupRecord lookupRecord; + private static XPath xpath = XPathFactory.newInstance().newXPath(); + + /** + * @return the transformationFunctionProxy instance + */ + public static TransformationFunctionProxy getInstance(){ + if ( tf == null ){ + tf = new TransformationFunctionProxy(); + try { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + dbf.setNamespaceAware(true); + docBuilder = dbf.newDocumentBuilder(); + transformer = TransformerFactory.newInstance().newTransformer(); + xpath.setNamespaceContext(new NamespaceContext() { + + @Override + public Iterator getPrefixes(String namespaceURI) { + // TODO Auto-generated method stub + return null; + } + + @Override + public String getPrefix(String namespaceURI) { + // TODO Auto-generated method stub + return null; + } + + @Override + public String getNamespaceURI(String aPrefix) { + if (aPrefix == null){ + throw new IllegalArgumentException("No prefix provided!"); + }else if (aPrefix.equals(XMLConstants.DEFAULT_NS_PREFIX)){ + return "http://namespace.openaire.eu"; + }else if (aPrefix.equals("dc")){ + return "http://purl.org/dc/elements/1.1/"; + }else{ + return XMLConstants.NULL_NS_URI; + } + } + }); + + } catch (Exception e) { + log.fatal("error while instantiating DocumentBuilderFactory, Transfomer, Xpath.namespacecontext", e); + throw new IllegalStateException(e); + } + } + return tf; + } + + /** + * @param uniqueKey + * @param i + * @return + */ + public String getValue(String uniqueKey, int i){ + if ( !mapOfResults.containsKey(uniqueKey)){ + throw new IllegalStateException("unknown key: " + uniqueKey); + } + return mapOfResults.get(uniqueKey).get(i); + } + + /** + * @param uniqueKey + * @param i + * @return + * @deprecated + */ +// public String convert(String uniqueKey, int i){ +// if (mapOfResults == null){ +// return "transformationFunctionProxy_convert not initialized"; +// }else{ +// if (!mapOfResults.containsKey(uniqueKey)){ +// throw new IllegalStateException("unknown key: " + uniqueKey); +// } +// return mapOfResults.get(uniqueKey).get(i); +// } +// } + + /** + * @param uniqueKey + * @param i + * @param aPos + * @return + * @deprecated + */ +// public String convert(String uniqueKey, int i, int aPos){ +// if (mapOfResults == null){ +// return "transformationFunctionProxy_convert not initialized"; +// }else{ +// if (!mapOfResults.containsKey(uniqueKey)){ +// throw new IllegalStateException("unknown key: " + uniqueKey); +// } +// return mapOfResults.get(uniqueKey).get(i, aPos); +// } +// } + + /** + * @param uniqueKey + * @param i + * @return + */ + public String extract(String uniqueKey, int i){ + if (mapOfResults == null){ + return "transformationFunctionProxy_extract not initialized"; + }else{ + if (!mapOfResults.containsKey(uniqueKey)){ + throw new IllegalStateException("unknown key: " + uniqueKey); + } + return mapOfResults.get(uniqueKey).get(i); + } + } + + /** + * normalize values given as an input value by using a vocabulary + * @param aInput - the value as a String + * @param aVocabularyName - the name of the vocabulary, which must be known for the vocabulary registry + * @return + */ + public synchronized String convertString(String aInput, String aVocabularyName){ + List values = new LinkedList(); + values.add(aInput); + try { + log.debug("conversion input: " + aInput); + String conversionResult = convertFunction.executeSingleValue(aVocabularyName, values); + log.debug("conversion result: " + conversionResult); + return conversionResult; + } catch (ProcessingException e) { + log.fatal("convert failed for args 'input': " + aInput + " , 'vocabularyName': " + aVocabularyName, e); + throw new IllegalStateException(e); + } + } + + /** + * normalize values given as a NodeList by using a vocabulary + * @param aInput - the input values as NodeList + * @param aVocabularyName - the name of the vocabulary, which must be known for the vocabulary registry + * @return + */ + public synchronized String convert(NodeList aInput, String aVocabularyName){ + List values = new LinkedList(); + getTextFromNodeList(aInput, values); + try { + return convertFunction.executeSingleValue(aVocabularyName, values); + } catch (ProcessingException e) { + throw new IllegalStateException(e); + } + } + + public synchronized String convert(NodeList aInput, String aVocabularyName, String aDefaultPattern, String aFunction){ + List values = new LinkedList(); + getTextFromNodeList(aInput, values); + try { + List results = convertFunction.executeFilterByParams(aVocabularyName, values, aDefaultPattern, aFunction); + if (results.size() > 0) + return results.get(0); + else + return ""; + } catch (ProcessingException e) { + throw new IllegalStateException(e); + } + } + + private void getTextFromNodeList(NodeList aNodeList, List aTextvalues){ + for (int i = 0; i < aNodeList.getLength(); i++){ + Node n = aNodeList.item(i); + if (n.getNodeType() == Node.ELEMENT_NODE) + getTextFromNodeList(n.getChildNodes(), aTextvalues); + else if (n instanceof Text) + aTextvalues.add(n.getNodeValue()); + } + } + + /** + * substitutes using regular expression + * @param aInput + * @param aReplacement + * @param aRegularExpression + * @return + */ + public synchronized String regExpr(String aInput, String aReplacement, String aRegularExpression){ + try { + int lastSlash = aRegularExpression.lastIndexOf("/"); + String trailingOptions = aRegularExpression.substring(lastSlash); +// log.debug("trailingOptions: " + trailingOptions); + int replacementSlash = aRegularExpression.substring(0, lastSlash).lastIndexOf("/"); + String replacementFromExpression = aRegularExpression.substring(replacementSlash + 1, lastSlash); +// log.debug("replacementFromExpr lengt: " + replacementFromExpression.length() + ", value: " + replacementFromExpression); + String newRegExpr = aRegularExpression.substring(0, replacementSlash + 1) + aReplacement + replacementFromExpression + trailingOptions; +// log.debug("newRegExpr: " + newRegExpr); + return regExprFunction.executeSingleValue(newRegExpr, aInput, aReplacement); + } catch (ProcessingException e) { + throw new IllegalStateException(e); + } + } + + public String lookup(String aIdentifier, String aPropertyKey){ + log.debug("functionProxy.lookup: param identifier: " + aIdentifier + " , key: " + aPropertyKey); + return this.lookupRecord.getPropertyValue(aIdentifier, aPropertyKey); + } + + public synchronized Collection split(NodeList aInput, String aRegularExpression, String aCallId){ + try { + List textValues = new LinkedList(); + getTextFromNodeList(aInput, textValues); + return split.executeAllValues(textValues, aRegularExpression); + //return split.executeSingleValue(textValues, aRegularExpression, aCallId); + }catch (ProcessingException e){ + throw new IllegalStateException(e); + } + } + + public synchronized String split(String aCallId){ + try { + return split.executeSingleValue(aCallId); + }catch (ProcessingException e){ + throw new IllegalStateException(e); + } + } + + /** + * extract content that match pattern given by a regular expression from a given node + * @param aXpathExprJson + * @param aInput + * @param aRegExpression + * @return nodeList + */ + public synchronized NodeList identifierExtract(String aXpathExprJson, Node aInput, String aRegExpression){ + String xpathExprJson = StringEscapeUtils.unescapeXml(aXpathExprJson); + log.debug("unescape xpathExprJson: " + xpathExprJson); + String regExpression = StringEscapeUtils.unescapeXml(aRegExpression); + log.debug("unescape regExpr" + regExpression); + + try{ + List xpathExprList = JSONParser.defaultJSONParser().parse(List.class, xpathExprJson); + + // workaround: rewrap, why ? + DOMSource s = new DOMSource(aInput); + StringWriter w = new StringWriter(); + Result r = new StreamResult(w); + transformer.transform(s, r); + Document doc = docBuilder.parse(new InputSource(new StringReader(w.toString()))); + + return identifierExtractFunction.extract(xpathExprList, doc, regExpression, docBuilder.newDocument(), xpath); + }catch(Exception e){ + log.fatal("identifierExtract failed for node value: " + aInput.getNodeValue(), e); + throw new IllegalStateException(e.getMessage()); + } + } + + /** + * @param key + * @param resultsFunction_getvalue + */ + public void setResults(String key, FunctionResults resultsFunction_getvalue) { + mapOfResults.put(key, resultsFunction_getvalue); + } + + /** + * @param convertFunction the convertFunction to set + */ + public void setConvertFunction(Convert convertFunction) { + this.convertFunction = convertFunction; + } + + /** + * @return the convertFunction + */ + public Convert getConvertFunction() { + return convertFunction; + } + + /** + * @return the lookupRecord + */ + public LookupRecord getLookupRecord() { + return lookupRecord; + } + + /** + * @param lookupRecord the lookupRecord to set + */ + public void setLookupRecord(LookupRecord lookupRecord) { + this.lookupRecord = lookupRecord; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/FunctionResults.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/FunctionResults.java new file mode 100644 index 0000000..ed13716 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/FunctionResults.java @@ -0,0 +1,67 @@ +package eu.dnetlib.data.collective.transformation.engine; + +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +/** + * @author js + * + */ +public class FunctionResults { + + private Map> resultMap = new LinkedHashMap>(); + + + /** + * get the first single result from the result list at the given index + * @param aIndex + * @return a result + */ + public String get(int aIndex){ + return resultMap.get(aIndex + "").get(0); + } + + /** + * get the single result for the node at the given position from the list at the given index + * @param aIndex + * @param aPosition + * @return a result + */ + public String get(int aIndex, int aPosition){ + if (aPosition <= 0){ + throw new IllegalArgumentException("position is " + aPosition + ", must be greater 0"); + } + return resultMap.get(aIndex + "").get(aPosition - 1); + } + + /** + * add a collection containing the results for each record + * @param aCollection + */ + public void addAll(Collection aCollection){ + for (String result : aCollection){ + add(result); + } + } + + /** + * add a single result calculated for a record node + * @param aResult + */ + public void add(String aResult){ + List resultList = new LinkedList(); + resultList.add(aResult); + resultMap.put(resultMap.size() + "", resultList); + } + + /** + * add a list of results calculated for all resp. record nodes + * @param aResults + */ + public void add(List aResults){ + resultMap.put(resultMap.size() + "", aResults); + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/PreProcessor.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/PreProcessor.java new file mode 100644 index 0000000..c967d35 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/PreProcessor.java @@ -0,0 +1,240 @@ +package eu.dnetlib.data.collective.transformation.engine; + +import java.io.StringReader; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.DocumentHelper; +import org.dom4j.Node; +import org.dom4j.XPath; +import org.dom4j.io.SAXReader; + +import eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy; +import eu.dnetlib.data.collective.transformation.engine.functions.Convert; +import eu.dnetlib.data.collective.transformation.engine.functions.Extract; +import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException; +import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression; +import eu.dnetlib.data.collective.transformation.engine.functions.RetrieveValue; +import eu.dnetlib.data.collective.transformation.rulelanguage.IRule; +import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; +import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall; + +/** + * @author jochen + * + */ +public class PreProcessor { + + @SuppressWarnings("unused") + private static final Log log = LogFactory.getLog(PreProcessor.class); + private Convert convertFunction; + private Extract extractFunction; + private RetrieveValue retrieveFunction; + private RegularExpression regExprFunction; + private TransformationFunctionProxy functionProxy; + private SAXReader reader = new SAXReader(); + private Map nsMap = new HashMap(); + + /** + * pre-process output values from object records using a function call + * @param aFunctionCall function call object + * @param aObjectRecords list of object records + * @param aNamespaceMap map of namespace prefixes and uris + */ + public void preprocess( + FunctionCall aFunctionCall, + List aObjectRecords, + Map aNamespaceMap, + Map aStaticResults, + Map aJobProperties, + Map aVarRules){ + this.nsMap = aNamespaceMap; + FunctionResults functionResults = new FunctionResults(); + + try { + if (aFunctionCall.getExternalFunctionName().equals("extract")){ + String featureName = aFunctionCall.getParameters().get(Extract.paramNameFeature); + functionResults.addAll(extractFunction.execute(aObjectRecords, featureName)); + }else{ + for (String objRecord: aObjectRecords){ + String result = null; + if (aFunctionCall.getExternalFunctionName().equals("convert")){ + if (aFunctionCall.isStatic() && aStaticResults.containsKey(aFunctionCall.getUuid())){ + functionResults.add(aStaticResults.get(aFunctionCall.getUuid())); + }else{ + String vocabName = aFunctionCall.getParameters().get(Convert.paramVocabularyName); + String fieldExpr = aFunctionCall.getParameters().get(Convert.paramFieldValue); + List recordValues = getValuesFromRecord(objRecord, fieldExpr); + if (aFunctionCall.isStatic()) + aStaticResults.put(aFunctionCall.getUuid(), convertFunction.executeSingleValue(vocabName, recordValues)); + else + functionResults.add(convertFunction.executeAllValues(vocabName, recordValues)); + } + }else if (aFunctionCall.getExternalFunctionName().equals("getValue")){ + if (aFunctionCall.isStatic() && aStaticResults.containsKey(aFunctionCall.getUuid())) + functionResults.add(aStaticResults.get(aFunctionCall.getUuid())); + else{ + String functionName = aFunctionCall.getParameters().get(RetrieveValue.paramFunctionName); + result = retrieveFunction.executeSingleValue(functionName, aFunctionCall.getArguments(), objRecord, nsMap); + functionResults.add(result); + if (aFunctionCall.isStatic()) + aStaticResults.put(aFunctionCall.getUuid(), result); + } + }else if (aFunctionCall.getExternalFunctionName().equals("regExpr")){ + // TODO + if (aFunctionCall.isStatic() && aStaticResults.containsKey(aFunctionCall.getUuid())){ + //log.debug("static functioncal; static result exist to compute regexpr: " + aFunctionCall.getParameters().get(RegularExpression.paramRegularExpr)); +// functionResults.add(aStaticResults.get(aFunctionCall.getUuid())); + }else{ +// log.debug("static functioncal to compute regexpr: " + aFunctionCall.getParameters().get(RegularExpression.paramRegularExpr)); + String regularExpression = aFunctionCall.getParameters().get(RegularExpression.paramRegularExpr); //.replaceAll("'", ""); + String expression1 = aFunctionCall.getParameters().get(RegularExpression.paramExpr1); + List recordValues = null; + // distinguish xpath-expr, jobConst, var +// log.debug("expression1: " + expression1); + + if (aJobProperties.containsKey(expression1)){ + recordValues = new LinkedList(); + recordValues.add(aJobProperties.get(expression1)); + }else{ + recordValues = getValuesFromRecord(objRecord, expression1); + } + + String expression2 = aFunctionCall.getParameters().get(RegularExpression.paramExpr2); + String replacement = ""; + if (aJobProperties.containsKey(expression2)){ + replacement = aJobProperties.get(expression2); + }else if (aVarRules.containsKey(expression2)){ + Rules varRule = (Rules)aVarRules.get(expression2); + replacement = varRule.getConstant().replace("'", ""); // currently limited to constant rules. + }else { + replacement = getValuesFromRecord(objRecord, expression2).get(0); // get the first available value + } + List regExprResults = new LinkedList(); + for (String fieldValue: recordValues){ + try { + int lastSlash = regularExpression.lastIndexOf("/"); + String trailingOptions = regularExpression.substring(lastSlash); + int replacementSlash = regularExpression.substring(0, lastSlash).lastIndexOf("/"); + String replacementFromExpression = regularExpression.substring(replacementSlash + 1, lastSlash); + String newRegExpr = regularExpression.substring(0, replacementSlash + 1) + replacement + replacementFromExpression + trailingOptions; // ??? + result = regExprFunction.executeSingleValue(newRegExpr, fieldValue, replacement); + regExprResults.add(result); + } catch (ProcessingException e) { + throw new IllegalStateException(e); + } +// regExprResults.add(regExprFunction.executeSingleValue(regularExpression, fieldValue, expression2)); + } + functionResults.add(regExprResults); + // assuming 1 result only + if (aFunctionCall.isStatic()){ + aStaticResults.put(aFunctionCall.getUuid(), result); + } + + // unsupported +// if (aFunctionCall.isStatic()){ +// aStaticResults.put(aFunctionCall.getUuid(), result); +// } + } + } + } + } + } catch (ProcessingException e) { + throw new IllegalStateException(e); + } catch (DocumentException e) { + throw new IllegalStateException(e); + } + functionProxy.setResults(aFunctionCall.getUuid(), functionResults); + } + + public void setFunctionProxy(TransformationFunctionProxy functionProxy) { + this.functionProxy = functionProxy; + } + + public TransformationFunctionProxy getFunctionProxy() { + return functionProxy; + } + + public void setConvertFunction(Convert convertFunction) { + this.convertFunction = convertFunction; + } + + public Convert getConvertFunction() { + return convertFunction; + } + + /** + * @param retrieveFunction the retrieveFunction to set + */ + public void setRetrieveFunction(RetrieveValue retrieveFunction) { + this.retrieveFunction = retrieveFunction; + } + + /** + * @return the retrieveFunction + */ + public RetrieveValue getRetrieveFunction() { + return retrieveFunction; + } + + /** + * @return the regExprFunction + */ + public RegularExpression getRegExprFunction() { + return regExprFunction; + } + + /** + * @param regExprFunction the regExprFunction to set + */ + public void setRegExprFunction(RegularExpression regExprFunction) { + this.regExprFunction = regExprFunction; + } + + /** + * @param extractFunction the extractFunction to set + */ + public void setExtractFunction(Extract extractFunction) { + this.extractFunction = extractFunction; + } + + /** + * @return the extractFunction + */ + public Extract getExtractFunction() { + return extractFunction; + } + + /** + * evaluate given XPath Expr applied on a record and return the values as a list of strings + * @param record + * @param xpathExpr + * @return list of strings + * @throws DocumentException + */ + @SuppressWarnings("unchecked") + private List getValuesFromRecord(String record, String xpathExpr) throws DocumentException{ + List values = new LinkedList(); + Document doc = reader.read(new StringReader(record)); + XPath xpath = DocumentHelper.createXPath(xpathExpr); + xpath.setNamespaceURIs(nsMap); + Object context = xpath.evaluate(doc); + if (context instanceof String) + values.add((String)context); + else if (context instanceof List) + for (Node node: (List)context) + values.add(node.getText()); + else if (context instanceof Node) + values.add( ((Node)context).getText()); + else if (context instanceof Number) + values.add( ((Number)context).intValue() + ""); + return values; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/SimpleTransformationEngine.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/SimpleTransformationEngine.java new file mode 100644 index 0000000..2d1d76f --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/SimpleTransformationEngine.java @@ -0,0 +1,409 @@ +package eu.dnetlib.data.collective.transformation.engine; + +import java.io.StringReader; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; + +import net.sf.saxon.instruct.TerminationException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.springframework.core.io.Resource; +import org.w3c.dom.Node; +import org.xml.sax.InputSource; + +import eu.dnetlib.common.profile.ResourceDao; +import eu.dnetlib.data.collective.transformation.IDatabaseConnector; +import eu.dnetlib.data.collective.transformation.TransformationException; +import eu.dnetlib.data.collective.transformation.VocabularyRegistry; +import eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy; +import eu.dnetlib.data.collective.transformation.engine.core.ITransformation; +import eu.dnetlib.data.collective.transformation.engine.functions.Convert; +// import eu.dnetlib.data.collective.transformation.engine.functions.Dblookup; +import eu.dnetlib.data.collective.transformation.engine.functions.Extract; +import eu.dnetlib.data.collective.transformation.engine.functions.IFeatureExtraction; +import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException; +import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression; +import eu.dnetlib.data.collective.transformation.engine.functions.RetrieveValue; +import eu.dnetlib.data.collective.transformation.engine.functions.RetrieveValue.FUNCTION; +import eu.dnetlib.data.collective.transformation.rulelanguage.Argument; +import eu.dnetlib.data.collective.transformation.rulelanguage.Argument.Type; +import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall; +import eu.dnetlib.data.collective.transformation.utils.BlacklistConsumer; + +/** + * @author jochen + * + */ +public class SimpleTransformationEngine { + + private static Log log = LogFactory.getLog(SimpleTransformationEngine.class); + private ITransformation transformation; + private VocabularyRegistry vocabularyRegistry; + private IDatabaseConnector databaseConnector; + private ResourceDao resourceDao; + private IFeatureExtraction featureExtraction; + private final List mdRecords = new LinkedList(); + private long totalTransformedRecords = 0; + private long totalIgnoredRecords = 0; + private String mappingFile; + private boolean stylesheetParamsCalculated = false; + private boolean preprocessingDone = false; + private Map stylesheetParams = new LinkedHashMap(); + private Resource blacklistApi; + private List blacklistedRecords = new LinkedList(); + + + /** + * execute any preprocessings declared in the transformation script prior starting the transformation of records + */ + public void preprocess(String dataSourceId) { + for (Map preprocMap : this.transformation.getRuleLanguageParser().getPreprocessings()) { + Iterator it = preprocMap.keySet().iterator(); + while (it.hasNext()) { + String function = it.next(); +// if (function.equals("dblookup")) { +// Dblookup fun = new Dblookup(); +// fun.setDbConnector(databaseConnector); +// try { +// log.debug("preprocessingMap value: " + preprocMap.get(function)); +// TransformationFunctionProxy.getInstance().setLookupRecord(fun.getResults(preprocMap.get(function))); +// } catch (Exception e) { +// log.debug(e.getMessage()); +// throw new IllegalStateException(e); +// } +// } + if (function.equals("blacklist")) { + BlacklistConsumer bc = new BlacklistConsumer(); + try{ + blacklistedRecords = bc.getBlackList(blacklistApi.getURL() + dataSourceId); + }catch(Exception e){ + throw new IllegalStateException("error in preprocess: " + e.getMessage()); + } + } + } + } + log.debug("preprocessing done."); + } + + /** + * check if blacklistedRecords exist and if so check if the current record is blacklisted by its objIdentifier + * @param aRecord + * @return + * @throws XPathExpressionException + * @throws ProcessingException + */ + private boolean isBlacklistRecord(String aRecord){ + if (blacklistedRecords.size() == 0) return false; + XPath xpath = XPathFactory.newInstance().newXPath(); + try{ + Node root = (Node) xpath.evaluate("/", new InputSource(new StringReader(aRecord)), XPathConstants.NODE); + String objId = xpath.evaluate("//*[local-name()='objIdentifier']", root); + if (blacklistedRecords.contains(objId)) return true; + }catch(Exception e){ + throw new IllegalStateException("error in isBlacklistRecord: " + e.getMessage()); + } + return false; + } + + /** + * transforms a source record + * + * @param sourceRecord + * the record to transform + * @return transformed record + */ + public String transform(final String sourceRecord) { + List objectRecords = new LinkedList(); + objectRecords.add(sourceRecord); + int index = 0; + mdRecords.clear(); + initTransformationFunction(); + + if (!stylesheetParamsCalculated) { + try{ + calculateStylesheetParams(sourceRecord); + }catch(Exception e){ + throw new IllegalStateException("error in calculateStylesheetParams" + e.getMessage()); + } + } + + if (!preprocessingDone){ + // xpath sourceRecord dataSourceid + preprocess(stylesheetParams.get("varBlacklistDataSourceId")); + preprocessingDone = true; + } + + if (isBlacklistRecord(sourceRecord)){ + try{ + mdRecords.add(transformation.transformRecord(sourceRecord, ITransformation.XSLSyntaxcheckfailed)); + }catch(Exception e){ + log.fatal(sourceRecord); + throw new IllegalStateException(e); + } + }else if (!transformation.getRuleLanguageParser().isXslStylesheet()) { + // iterate over all rules which are functionCalls + log.debug("functionCalls size: " + transformation.getRuleLanguageParser().getFunctionCalls().size()); + for (FunctionCall functionCall : transformation.getRuleLanguageParser().getFunctionCalls()) { + preprocess(objectRecords, functionCall); + } + for (String record : objectRecords) { + // log.debug(record); + try { + log.debug("now run transformation for record with index: " + index); + try{ + String transformedRecord = transformation.transformRecord(record, index); + mdRecords.add(transformedRecord); + } catch (TerminationException e){ + log.debug("record transformation terminated."); + String failedRecord = transformation.transformRecord(record, ITransformation.XSLSyntaxcheckfailed); + log.debug(failedRecord); + totalIgnoredRecords++; + mdRecords.add(failedRecord); + } + } catch (TransformationException e) { + log.error(sourceRecord); + throw new IllegalStateException(e); + } + index++; + } + } else { + for (String record : objectRecords) { + // test for init params and assign values + try { + log.debug("now run transformation for record with index: " + index); + try{ + String transformedRecord = transformation.transformRecord(record, stylesheetParams); + mdRecords.add(transformedRecord); + }catch(TerminationException e){ + String failedRecord = transformation.transformRecord(record, ITransformation.XSLSyntaxcheckfailed); + totalIgnoredRecords++; + log.debug(failedRecord); + mdRecords.add(failedRecord); + } + } catch (TransformationException e) { + log.error(sourceRecord); + throw new IllegalStateException(e); + } + index++; + } + } + + totalTransformedRecords = totalTransformedRecords + mdRecords.size(); + log.debug("objRecordSize: " + objectRecords.size() + ", mdRecordSize: " + mdRecords.size() + ", ignoredRecordSize: " + totalIgnoredRecords); + return mdRecords.get(0); + } + + private void calculateStylesheetParams(final String aRecord) throws XPathExpressionException, ProcessingException { + stylesheetParamsCalculated = true; + XPath xpath = XPathFactory.newInstance().newXPath(); + Node root = (Node) xpath.evaluate("/", new InputSource(new StringReader(aRecord)), XPathConstants.NODE); + String datasourcePrefix = xpath.evaluate("//*[local-name()='datasourceprefix']", root); + String profileXquery = "collection('/db/DRIVER/RepositoryServiceResources')//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=\"NamespacePrefix\"][value=\"" + datasourcePrefix + "\"]]"; + //String repositoryId = xpath.evaluate("//*[local-name()='repositoryId']", root); + log.debug("profileXquery: " + profileXquery); + // static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", + // xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]); + RetrieveValue retrieveValue = new RetrieveValue(); + retrieveValue.setResourceDao(resourceDao); + List argList = new LinkedList(); + argList.add(new Argument(Type.VALUE, profileXquery)); + Argument argXpath = new Argument(Type.INPUTFIELD, "//OFFICIAL_NAME"); + argList.add(argXpath); + String varOfficialName = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap()); + stylesheetParams.put("varOfficialName", varOfficialName); + argList.remove(argXpath); + argXpath = new Argument(Type.INPUTFIELD, "//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"); + argList.add(argXpath); + String varDataSourceId = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap()); + stylesheetParams.put("varDataSourceId", varDataSourceId); + argList.remove(argXpath); + argXpath = new Argument(Type.INPUTFIELD, "//CONFIGURATION/DATASOURCE_TYPE"); + argList.add(argXpath); + String varDsType = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap()); + stylesheetParams.put("varDsType", varDsType); + argList.remove(argXpath); + + // if blacklist + for (Map preprocMap : this.transformation.getRuleLanguageParser().getPreprocessings()) { + Iterator it = preprocMap.keySet().iterator(); + while (it.hasNext()) { + String function = it.next(); + if (function.equals("blacklist")) { + argXpath = new Argument(Type.INPUTFIELD, preprocMap.get(function)); // blacklistDataSourceIdXpath + argList.add(argXpath); + String varBlacklistDataSourceId = retrieveValue.executeSingleValue(FUNCTION.PROFILEFIELD.toString(), argList, null, new HashMap()); + stylesheetParams.put("varBlacklistDataSourceId", varBlacklistDataSourceId); + argList.remove(argXpath); + } + } + } + } + + private void initTransformationFunction() { + if (this.vocabularyRegistry == null) { throw new IllegalStateException("vocabularyReg is null"); } + Convert convertFunction = new Convert(); + convertFunction.setVocabularyRegistry(this.vocabularyRegistry); + TransformationFunctionProxy.getInstance().setConvertFunction(convertFunction); + + } + + /** + * preprocesses function if function is configured resp. + * + * @param records + * list of object records + * @param aFunctionCall + */ + private void preprocess(final List records, final FunctionCall aFunctionCall) { + try { + log.debug("preprocess"); + if (transformation.getRuleLanguageParser() == null) { throw new IllegalStateException("rulelanguageparser not initialised"); } + if (transformation.getRuleLanguageParser().getNamespaceDeclarations() == null) { throw new IllegalStateException("nsDecl is null"); } + PreProcessor preProc = new PreProcessor(); + preProc.setConvertFunction(TransformationFunctionProxy.getInstance().getConvertFunction()); + RetrieveValue retrieveValue = new RetrieveValue(); + retrieveValue.setResourceDao(resourceDao); + preProc.setRetrieveFunction(retrieveValue); + RegularExpression regExpr = new RegularExpression(); + preProc.setRegExprFunction(regExpr); + TransformationFunctionProxy functionProxy = TransformationFunctionProxy.getInstance(); + preProc.setFunctionProxy(functionProxy); + Extract extractFunction = new Extract(); + extractFunction.setFeatureExtraction(featureExtraction); + preProc.setExtractFunction(extractFunction); + if (aFunctionCall.doPreprocess() || aFunctionCall.isStatic()) { + // log.debug("now call preprocess with: " + aFunctionCall.getExternalFunctionName() + " " + aFunctionCall.getUuid()); + preProc.preprocess( + aFunctionCall, + records, + transformation.getRuleLanguageParser().getNamespaceDeclarations(), + transformation.getStaticTransformationResults(), + transformation.getJobProperties(), + transformation.getRuleLanguageParser().getVariableMappingRules()); + // log.debug("preprocess end"); + } else { + log.debug("skip preprocessing for function: " + aFunctionCall.getExternalFunctionName()); + } + + } catch (Exception e) { + throw new IllegalStateException(e); + } + + } + + /** + * @param transformation + * the transformation to set + */ + public void setTransformation(final ITransformation transformation) { + this.transformation = transformation; + } + + /** + * @return the transformation + */ + public ITransformation getTransformation() { + return transformation; + } + + /** + * @param vocabularyRegistry + * the vocabularyRegistry to set + */ + public void setVocabularyRegistry(final VocabularyRegistry vocabularyRegistry) { + this.vocabularyRegistry = vocabularyRegistry; + } + + /** + * @return the vocabularyRegistry + */ + public VocabularyRegistry getVocabularyRegistry() { + return vocabularyRegistry; + } + + /** + * @return the resourceDao + */ + public ResourceDao getResourceDao() { + return resourceDao; + } + + /** + * @param resourceDao + * the resourceDao to set + */ + public void setResourceDao(final ResourceDao resourceDao) { + this.resourceDao = resourceDao; + } + + /** + * @param featureExtraction + * the featureExtraction to set + */ + public void setFeatureExtraction(final IFeatureExtraction featureExtraction) { + this.featureExtraction = featureExtraction; + } + + /** + * @return the featureExtraction + */ + public IFeatureExtraction getFeatureExtraction() { + return featureExtraction; + } + + /** + * @return the databaseConnector + */ + public IDatabaseConnector getDatabaseConnector() { + return databaseConnector; + } + + /** + * @param databaseConnector + * the databaseConnector to set + */ + public void setDatabaseConnector(final IDatabaseConnector databaseConnector) { + this.databaseConnector = databaseConnector; + } + + public long getTotalTransformedRecords() { + return this.totalTransformedRecords; + } + + public long getTotalIgnoredRecords() { + return this.totalIgnoredRecords; + } + + /** + * @return the mappingFile + */ + public String getMappingFile() { + return mappingFile; + } + + /** + * @param mappingFile + * the mappingFile to set + */ + public void setMappingFile(final String mappingFile) { + this.mappingFile = mappingFile; + } + + public Resource getBlacklistApi() { + return blacklistApi; + } + + public void setBlacklistApi(Resource blacklistApi) { + this.blacklistApi = blacklistApi; + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/core/ITransformation.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/core/ITransformation.java new file mode 100644 index 0000000..0e7348e --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/core/ITransformation.java @@ -0,0 +1,54 @@ +package eu.dnetlib.data.collective.transformation.engine.core; + +import java.util.Map; +import java.util.Properties; + +import net.sf.saxon.instruct.TerminationException; +import eu.dnetlib.data.collective.transformation.TransformationException; +import eu.dnetlib.data.collective.transformation.rulelanguage.RuleLanguageParser; + +/** + * @author jochen + * + */ +public interface ITransformation { + + public static final String JOBCONST_DATASINKID = "$job.datasinkid"; + public static final String XSLSyntaxcheckfailed = "syntaxcheckfailed.xsl"; + + /** + * transforms a single record + * + * @param aRecord the record to transform + * @param aIndex + * @return the transformed record + * @throws TerminationException, TransformationServiceException + */ + public String transformRecord(String aRecord, int aIndex) throws TerminationException, TransformationException; + + /** + * transforms a single record whyle applying a stylesheet + * @param aRecord + * @param aStylesheet + * @return + * @throws TransformationException + */ + public String transformRecord(String aRecord, String aStylesheet) throws TransformationException; + + public String transformRecord(String aRecord, Map aStylesheetParams) throws TerminationException, TransformationException; + + /** + * @return the rule language parser + */ + public RuleLanguageParser getRuleLanguageParser(); + + public Map getStaticTransformationResults(); + + public Map getJobProperties(); + + /** + * get log information that was recorded during transformation + * @return properties + */ + public Properties getLogInformation(); +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/core/StylesheetBuilder.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/core/StylesheetBuilder.java new file mode 100644 index 0000000..609b413 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/core/StylesheetBuilder.java @@ -0,0 +1,335 @@ +package eu.dnetlib.data.collective.transformation.engine.core; + +import java.io.StringReader; +import java.io.StringWriter; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.Map; +import java.util.Queue; +import java.util.Set; + +import javax.xml.namespace.NamespaceContext; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Result; +import javax.xml.transform.Source; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import eu.dnetlib.data.collective.transformation.core.schema.SchemaElement; +import eu.dnetlib.data.collective.transformation.core.schema.SchemaInspector; +import eu.dnetlib.data.collective.transformation.core.xsl.XslConstructor; +import eu.dnetlib.data.collective.transformation.core.xsl.XsltConstants; +import eu.dnetlib.data.collective.transformation.core.xsl.XslElement; +import eu.dnetlib.data.collective.transformation.rulelanguage.IRule; +import eu.dnetlib.data.collective.transformation.rulelanguage.RuleLanguageParser; +import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; +import eu.dnetlib.data.collective.transformation.rulelanguage.util.Converter; + +/** + * @author jochen + * + */ +public class StylesheetBuilder { + + private static final Log log = LogFactory.getLog(StylesheetBuilder.class); + private SchemaInspector schemaInspector; + private RuleLanguageParser ruleLanguageParser; + private NamespaceContext namespaceContext; + // implicit rule for deleted records + private final String elementNameIndicatingDeletedRecords = "header"; + private final String attributeNameIndicatingDeletedRecords = "status"; + private final String attributeValueIndicatingDeletedRecords = "deleted"; + private final String elementNameAbout = "about"; + + public String createTemplate(){ + if (schemaInspector == null || ruleLanguageParser == null || namespaceContext == null){ + throw new IllegalStateException("StylesheetBuidler is not initialized with schemaInspector or ruleLanguageParser or namespaceContext."); + } + if (!schemaInspector.isInspected()){ + throw new IllegalStateException("schemaInspector must first inspect in order to create a stylesheet."); + } + StringBuilder builder = new StringBuilder(); + XslElement templateRoot = new XslElement("templateroot"); + templateRoot.addBoundPrefix(XsltConstants.nsXsl); + templateRoot.addAllBoundPrefixes(Converter.getBoundPrefixes(this.ruleLanguageParser.getNamespaceDeclarations())); + XslElement template = new XslElement(XsltConstants.template); + template.addAttribute("match", "/"); + Map> ruleMapping = this.ruleLanguageParser.getElementMappingRules(); + Map variableRuleMapping = this.ruleLanguageParser.getVariableMappingRules(); + Map templateRuleMapping = this.ruleLanguageParser.getTemplateMappingRules(); + Queue templateQueue = new LinkedList(); + XslElement rootField = new XslElement(schemaInspector.getRootElement()); + XslConstructor xslConstructor = new XslConstructor(); + + /** + * * + */ +// Iterator keyIterator = ruleMapping.keySet().iterator(); +// while(keyIterator.hasNext()){ +// System.out.println("stylesheetbuilder: key: " + keyIterator.next()); +// } + /* + * + */ + + int templateCounter = 1; + int standaloneTemplateCounter = 1; + // write variables at the beginning of the templateRoot + for (String variable: variableRuleMapping.keySet()){ + IRule currentVariableRule = variableRuleMapping.get(variable); + templateRoot.addEnclosedElements(xslConstructor.writeOutVariableRule((Rules)currentVariableRule)); + } + Map targetFieldTemplateMap = new LinkedHashMap(); + for (String keyTemplate: templateRuleMapping.keySet()){ + IRule currentTemplateRule = templateRuleMapping.get(keyTemplate); + targetFieldTemplateMap.put(((Rules)currentTemplateRule).getFunctionCall().getParameters().get("elementName"), "templName" + standaloneTemplateCounter); + templateRoot.addEnclosedElements(xslConstructor.writeOutRecursiveTemplate((Rules)currentTemplateRule , "templName" + (standaloneTemplateCounter++), this.ruleLanguageParser.getNamespaceDeclarations()).asXml()); + } + XslElement chooseField = new XslElement(XsltConstants.choose); + XslElement whenField = new XslElement(XsltConstants.when, + "test", "//" + this.elementNameIndicatingDeletedRecords + "/@" + this.attributeNameIndicatingDeletedRecords + "='" + this.attributeValueIndicatingDeletedRecords + "'"); + XslElement otherwiseField = new XslElement(XsltConstants.otherwise); + String templateAboutName = "applyAbout"; + + // write schema elements + for (SchemaElement element: schemaInspector.getChildElements()){ + if (!element.containsSimpleType()){ + String complexTypeTemplateName = "apply" + (templateCounter++); + XslElement complexTypeTemplate = new XslElement(XsltConstants.template, "name", complexTypeTemplateName); + // will contain only other elements + XslElement childField = new XslElement(XsltConstants.element, "name", element.getName()); + if (element.getName().equals(this.elementNameIndicatingDeletedRecords)){ + XslElement ifField = new XslElement(XsltConstants.ifCondition, "test", "//" + this.elementNameIndicatingDeletedRecords + "/@" + this.attributeNameIndicatingDeletedRecords); + XslElement attributeField = new XslElement(XsltConstants.attribute, "name", this.attributeNameIndicatingDeletedRecords); + XslElement valueofField = new XslElement(XsltConstants.valueOf, "select", "//" + this.elementNameIndicatingDeletedRecords + "/@" + this.attributeNameIndicatingDeletedRecords); + attributeField.addEnclosedElements(valueofField.asXml()); + ifField.addEnclosedElements(attributeField.asXml()); + childField.addEnclosedElements(ifField.asXml()); + } + + if (element.getName().equals(this.elementNameAbout)){ + XslElement templateAbout = new XslElement(XsltConstants.template, "name", templateAboutName); + templateAbout.addEnclosedElements(xslConstructor.writeOutApplyAbout()); + templateQueue.add(templateAbout.asXml()); + } + + for (SchemaElement childElement: element.getChildList()){ + String currentKey = getPrefixedElementName(childElement); + if (childElement.containsSimpleType()){ + log.debug("currentKey: " + currentKey); + if (ruleMapping.containsKey(currentKey)){ + for (IRule currentRule: ruleMapping.get(currentKey)){ + if (currentRule instanceof Rules){ + log.debug(" has Set? " + ((Rules)currentRule).hasSet() ); + log.debug(" has Condition? " + ((Rules)currentRule).hasCondition() ); + + if ( !((Rules)currentRule).hasCondition()){ + if ( !((Rules)currentRule).hasSet() ){ + if (currentRule.definesTemplateMatch()){ + String templateName = ((Rules)currentRule).getTemplateMatch(); + XslElement subTemplate = new XslElement(XsltConstants.template, "match", templateName); + subTemplate.addEnclosedElements(xslConstructor.writeOutRuleCopy((Rules)currentRule, currentKey)); + templateQueue.add(subTemplate.asXml()); + childField.addEnclosedElements(xslConstructor.writeOutApplyTemplates(((Rules)currentRule).getProperties().getProperty("applyTemplateSelectExpression"))); + }else{ + childField.addEnclosedElements(xslConstructor.writeOutRule((Rules)currentRule, currentKey)); + } + }else{ + childField.addEnclosedElements(xslConstructor.writeOutRuleComplex((Rules)currentRule, currentKey)); + } + }else{ + // has condition + + if ( ((Rules)currentRule).getCondition().isPrimary((Rules)currentRule) && + ((Rules)currentRule).getUniqueName().equals( ((Rules)currentRule).getCondition().getSecondaryRule().getUniqueName() ) ){ + + if ( ((Rules)currentRule).getCondition().getApplyExpression() != null ){ + String templateName = "apply" + (templateCounter++); + XslElement subTemplate = new XslElement(XsltConstants.template, "name", templateName); + subTemplate.addEnclosedElements(xslConstructor.writeOutApplyConditionalTemplateChoose((Rules)currentRule)); + templateQueue.add(subTemplate.asXml()); + childField.addEnclosedElements(xslConstructor.writeOutCallTemplate(templateName)); + }else{ + // a condition with alternative rules for the same output elements + childField.addEnclosedElements(xslConstructor.writeOutConditionalChoose((Rules) currentRule)); + } + }else if ( ! ((Rules)currentRule).getCondition().getPrimaryRule().getUniqueName().equals( ((Rules)currentRule).getCondition().getSecondaryRule().getUniqueName() ) ){ + // a condition with alternative rules for distinct output elements + if ( ((Rules)currentRule).getCondition().getApplyExpression() != null ){ + // has apply expression + String templateName = "apply" + (templateCounter++); + XslElement subTemplate = new XslElement(XsltConstants.template, "name", templateName); + subTemplate.addEnclosedElements(xslConstructor.writeOutApplyConditionalTemplateIf((Rules)currentRule, false)); + templateQueue.add(subTemplate.asXml()); + childField.addEnclosedElements(xslConstructor.writeOutCallTemplate(templateName)); + }else{ + childField.addEnclosedElements(xslConstructor.writeOutConditionalIf((Rules) currentRule)); + } + } + } + }else{ + // only Rules instances are supported + } + } + }else if (targetFieldTemplateMap.containsKey(currentKey)){ + childField.addEnclosedElements(xslConstructor.writeOutCallTemplate(targetFieldTemplateMap.get(currentKey))); + }else{ + + // no rule defined, check if element is mandatory + if (childElement.isMandatory()){ + XslElement emptyField = new XslElement(currentKey); + childField.addEnclosedElements(emptyField.asXml()); + } + } + }else{ + // complex-type elements + + if (ruleMapping.containsKey(currentKey)){ + for (IRule currentRule: ruleMapping.get(currentKey)){ + if (currentRule instanceof Rules){ + if ( !((Rules)currentRule).hasCondition() ){ + log.debug("stylesheetbuilder.complexType NO CONDITION: " + childElement.getName()); + childField.addEnclosedElements(xslConstructor.writeOutRuleComplex((Rules)currentRule, currentKey)); + }else{ + // log.debug("stylesheetbuilder.complexType HAS CONDITION: ONLY PARTLY IMPLEMENTED !!!!!!!!!!"); + + if ( ((Rules)currentRule).getCondition().isPrimary((Rules)currentRule) && + ((Rules)currentRule).getUniqueName().equals( ((Rules)currentRule).getCondition().getSecondaryRule().getUniqueName() ) ){ + // log.debug("condition: complex rule with same output elements"); + if ( ((Rules)currentRule).getCondition().getApplyExpression() != null ){ + log.debug("APPLY expression rules for complex-type elements NOT YET SUPPORTED"); + // log.debug("complex rule with apply expression: NOT YET IMPLEMENTED !!!!!!!!!!!!"); + }else{ + // a condition with alternative rules for the same output elements + childField.addEnclosedElements(xslConstructor.writeOutConditionalChooseComplex((Rules) currentRule)); + } + }else if ( ! ((Rules)currentRule).getCondition().getPrimaryRule().getUniqueName().equals( + ((Rules)currentRule).getCondition().getSecondaryRule().getUniqueName() ) ){ + log.debug("CURRENTLY UNSUPPORTED!!!"); + // a condition with alternative rules for distinct output elements + if ( ((Rules)currentRule).getCondition().getApplyExpression() != null ){ + log.debug("APPLY expression rules for complex-type elements NOT YET SUPPORTED"); + + String templateName = "apply" + (templateCounter++); + XslElement subTemplate = new XslElement(XsltConstants.template, "name", templateName); + subTemplate.addEnclosedElements(xslConstructor.writeOutApplyConditionalTemplateIf((Rules)currentRule, true)); + templateQueue.add(subTemplate.asXml()); + childField.addEnclosedElements(xslConstructor.writeOutCallTemplate(templateName)); + +// // has apply expression +// String templateName = "apply" + (templateCounter++); +// XslElement subTemplate = new XslElement(XsltConstants.template, "name", templateName); +// subTemplate.addEnclosedElements(xslConstructor.writeOutApplyConditionalTemplateIf((Rules)currentRule)); +// templateQueue.add(subTemplate.asXml()); +// childField.addEnclosedElements(xslConstructor.writeOutCallTemplate(templateName)); + }else{ + childField.addEnclosedElements(xslConstructor.writeOutConditionalIfComplex((Rules) currentRule)); + } + } + } + } + } + } + } + } + if ( !(childField.isEmpty() && !element.isMandatory()) ){ + complexTypeTemplate.addEnclosedElements(childField.asXml()); + templateQueue.add(complexTypeTemplate.asXml()); + + if (element.getName().equals(this.elementNameIndicatingDeletedRecords)){ + whenField.addEnclosedElements(xslConstructor.writeOutCallTemplate(complexTypeTemplateName)); + } + otherwiseField.addEnclosedElements(xslConstructor.writeOutCallTemplate(complexTypeTemplateName)); + +// rootField.addEnclosedElements(childField.asXml()); + } + + } + } + + chooseField.addEnclosedElements(whenField.asXml()); + otherwiseField.addEnclosedElements(xslConstructor.writeOutCallTemplate(templateAboutName)); + chooseField.addEnclosedElements(otherwiseField.asXml()); + rootField.addEnclosedElements(chooseField.asXml()); + template.addEnclosedElements(rootField.asXml()); + + templateRoot.addEnclosedElements(template.asXml()); + // add sub-templates from queue + for (String templateCode: templateQueue){ + templateRoot.addEnclosedElements(templateCode); + } + builder.append(templateRoot.asXml()); + log.debug(dumpStylesheetTemplate(builder.toString())); + return builder.toString(); + } + + String dumpStylesheetTemplate(String aTemplate){ + StringWriter w = new StringWriter(); + Source s = new StreamSource(new StringReader(aTemplate)); + Result r = new StreamResult(w); + Transformer t; + try { + t = TransformerFactory.newInstance().newTransformer(); + t.setOutputProperty(OutputKeys.METHOD, "xml"); + t.setOutputProperty(OutputKeys.INDENT, "yes"); + t.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); + t.transform(s, r); + } catch (Exception e) { + log.fatal(e); + } + return w.toString(); + } + + /** + * @param schemaInspector the schemaInspector to set + */ + public void setSchemaInspector(SchemaInspector schemaInspector) { + this.schemaInspector = schemaInspector; + } + + /** + * @return the schemaInspector + */ + public SchemaInspector getSchemaInspector() { + return schemaInspector; + } + + /** + * @return the ruleLanguageParser + */ + public RuleLanguageParser getRuleLanguageParser() { + return ruleLanguageParser; + } + + /** + * @param ruleLanguageParser the ruleLanguageParser to set + */ + public void setRuleLanguageParser(RuleLanguageParser ruleLanguageParser) { + this.ruleLanguageParser = ruleLanguageParser; + } + + /** + * @param namespaceContext the namespaceContext to set + */ + public void setNamespaceContext(NamespaceContext namespaceContext) { + this.namespaceContext = namespaceContext; + } + + /** + * @return the namespaceContext + */ + public NamespaceContext getNamespaceContext() { + return namespaceContext; + } + + + private String getPrefixedElementName(SchemaElement aElement){ + return ( this.namespaceContext.getPrefix(aElement.getTargetNamespace()) + ":" + aElement.getName() ); + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/core/TransformationImpl.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/core/TransformationImpl.java new file mode 100644 index 0000000..72733c6 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/core/TransformationImpl.java @@ -0,0 +1,353 @@ +package eu.dnetlib.data.collective.transformation.engine.core; + +import java.io.Reader; +import java.io.StringReader; +import java.io.StringWriter; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Source; +import javax.xml.transform.Templates; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; + +import net.sf.saxon.FeatureKeys; +import net.sf.saxon.instruct.TerminationException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.DocumentHelper; +import org.dom4j.Element; +import org.dom4j.Node; +import org.dom4j.io.SAXReader; +import org.springframework.core.io.Resource; + +import eu.dnetlib.data.collective.transformation.TransformationException; +import eu.dnetlib.data.collective.transformation.core.schema.SchemaInspector; +import eu.dnetlib.data.collective.transformation.rulelanguage.RuleLanguageParser; +import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; +import eu.dnetlib.data.collective.transformation.utils.NamespaceContextImpl; + +/** + * @author jochen + * + */ +public class TransformationImpl implements + ITransformation { + + private static final String rootElement = "record"; + private final Log log = LogFactory.getLog(TransformationImpl.class); + private Document xslDoc; + private SAXReader reader = new SAXReader(); + private Transformer transformer; + private Transformer transformerFailed; + protected RuleLanguageParser ruleLanguageParser; + private StylesheetBuilder stylesheetBuilder; + // cache static transformation results, valid for one transformation job + private Map staticResults = new LinkedHashMap(); + private Map jobConstantMap = new HashMap(); + + @javax.annotation.Resource(name="template") + private Resource template; + + private Resource schema; + + private Source xsltSyntaxcheckFailed; + + /** + * initializes the transformation with the underlying XSL-template + */ + public void init(){ + try { + xslDoc = reader.read(template.getInputStream()); + Resource xslResource = template.createRelative(XSLSyntaxcheckfailed); + String systemId = xslResource.getURL().toExternalForm(); + xsltSyntaxcheckFailed = new StreamSource(xslResource.getInputStream(), systemId); + + } catch (Throwable e) { + log.error("cannot initialize this transformation.", e); + throw new IllegalStateException(e); + } + + } + + public void addJobConstant(String aKey, String aValue){ + this.jobConstantMap.put(aKey, aValue); + } + + /** + * creates a new Transformer object using a stylesheet based on the transformation rules + */ + public void configureTransformation()throws TransformerConfigurationException{ + final List errorList = new ArrayList(); + + javax.xml.transform.ErrorListener listener = new javax.xml.transform.ErrorListener() { + + @Override + public void warning(TransformerException exception) throws TransformerException { + // TODO Auto-generated method stub + + } + + @Override + public void fatalError(TransformerException exception) throws TransformerException { + // TODO Auto-generated method stub + errorList.add(exception); + throw exception; + } + + @Override + public void error(TransformerException exception) throws TransformerException { + // TODO Auto-generated method stub + + } + }; + + TransformerFactory factory = TransformerFactory.newInstance(); + factory.setAttribute(FeatureKeys.ALLOW_EXTERNAL_FUNCTIONS, Boolean.TRUE); + factory.setErrorListener(listener); + Templates templates = null; + try{ + if (this.ruleLanguageParser.isXslStylesheet()){ + templates = factory.newTemplates(new StreamSource(new StringReader(ruleLanguageParser.getXslStylesheet()))); + }else{ + templates = factory.newTemplates(new StreamSource(createStylesheet())); + } + + transformer = templates.newTransformer(); + //((net.sf.saxon.Controller)transformer).setMessageEmitter(mw); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty(OutputKeys.METHOD, "xml"); + transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); + + Templates templateFailed = factory.newTemplates(xsltSyntaxcheckFailed); + transformerFailed = templateFailed.newTransformer(); + }catch(TransformerConfigurationException e){ + if (!errorList.isEmpty()) { + System.out.println(errorList.get(0).getMessageAndLocation()); // todo it seems the location information is not yet correct + throw new TransformerConfigurationException(errorList.get(0).getMessageAndLocation()); + }else{ + throw e; + } + } + + //((net.sf.saxon.Controller)transformerFailed).setMessageEmitter(mw); + } + + /* (non-Javadoc) + * @see eu.dnetlib.data.collective.transformation.engine.core.ITransformation#transformRecord(java.lang.String, int) + */ + public String transformRecord(String record, int index)throws TerminationException, TransformationException{ + try { + StreamSource s = new StreamSource(new StringReader(record)); + StringWriter writer = new StringWriter(); + StreamResult r = new StreamResult(writer); + transformer.setParameter("index", index); + transformer.transform(s , r); + return writer.toString(); + }catch (TerminationException e) { + log.debug(e.getLocalizedMessage()); + throw e; + } catch (TransformerException e) { + log.error(e); + throw new TransformationException(e); + } + } + + public String transformRecord(String record, Map parameters) throws TerminationException, TransformationException{ + try { + StreamSource s = new StreamSource(new StringReader(record)); + StringWriter writer = new StringWriter(); + StreamResult r = new StreamResult(writer); + for (String key: parameters.keySet()){ + transformer.setParameter(key, parameters.get(key)); + } + transformer.transform(s , r); + return writer.toString(); + }catch (TerminationException e){ + log.debug(e.getLocalizedMessage()); + throw e; + } catch (TransformerException e) { + log.error(e); + throw new TransformationException(e); + } + } + + public String transformRecord(String record, String stylesheetName) throws TransformationException{ + if (!stylesheetName.equals(XSLSyntaxcheckfailed)) + throw new IllegalArgumentException("in TransformationImpl: stylesheetname " + stylesheetName + " is unsupported!" ); + try{ + StreamSource s = new StreamSource(new StringReader(record)); + StringWriter w = new StringWriter(); + StreamResult r = new StreamResult(w); + transformerFailed.transform(s, r); + return w.toString(); + }catch (TransformerException e){ + log.error(e); + throw new TransformationException(e); + } + } + + public String dumpStylesheet(){ + return xslDoc.asXML(); + +// StringWriter writer = new StringWriter(); +// try { +// Transformer tXsl = transformer; //.newTransformer(); +// tXsl.setOutputProperty(OutputKeys.INDENT, "yes"); +// tXsl.setOutputProperty(OutputKeys.METHOD, "xml"); +// tXsl.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); +// +// StreamResult r = new StreamResult(writer); +// Source s = new StreamSource(new StringReader(xslDoc.asXML())); +// tXsl.transform(s, r); +// } catch (TransformerException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// } +// return writer.toString(); + } + + + /** + * sets the XSL template + * @param template - resource to access the XSL template + */ + public void setTemplate(Resource template) { + this.template = template; + } + + /** + * @return the resource to access the XSL template + */ + public Resource getTemplate() { + return template; + } + + public void setRuleLanguageParser(RuleLanguageParser ruleLanguageParser) { + this.ruleLanguageParser = ruleLanguageParser; + } + + public RuleLanguageParser getRuleLanguageParser() { + return ruleLanguageParser; + } + + /** + * @param stylesheetBuilder the stylesheetBuilder to set + */ + public void setStylesheetBuilder(StylesheetBuilder stylesheetBuilder) { + this.stylesheetBuilder = stylesheetBuilder; + } + + /** + * @return the stylesheetBuilder + */ + public StylesheetBuilder getStylesheetBuilder() { + return stylesheetBuilder; + } + + /** + * @return the transformation rules as String object + */ + protected String getTransformationRules(){ + // add job-properties to the rules as variables + for (String key: jobConstantMap.keySet()){ + Rules r = new Rules(); + r.setVariable(key); + r.setConstant("'" + jobConstantMap.get(key) + "'"); + ruleLanguageParser.getVariableMappingRules().put(JOBCONST_DATASINKID, r); + } + if (this.stylesheetBuilder == null){ + // create DMF compliant stylesheet builder + this.stylesheetBuilder = new StylesheetBuilder(); + this.stylesheetBuilder.setRuleLanguageParser(this.ruleLanguageParser); + NamespaceContextImpl namespaceContext = new NamespaceContextImpl(); + for (String prefix: ruleLanguageParser.getNamespaceDeclarations().keySet()){ + namespaceContext.addNamespace(prefix, ruleLanguageParser.getNamespaceDeclarations().get(prefix)); + } + SchemaInspector inspector = new SchemaInspector(); + try { + inspector.inspect(this.schema.getURL(), rootElement); + } catch (Exception e) { + throw new IllegalStateException(e); + } + this.stylesheetBuilder.setNamespaceContext(namespaceContext); + this.stylesheetBuilder.setSchemaInspector(inspector); + } + return this.stylesheetBuilder.createTemplate(); + } + + /** + * creates a stylesheet from transformation rules; + *

don't call this method multiple times, unless transformation configuration changes, then re-init and configure transformation

+ * @return the stylesheet + */ + private Reader createStylesheet(){ + try { + Document rulesDoc = DocumentHelper.parseText(getTransformationRules()); + for(String key: this.ruleLanguageParser.getNamespaceDeclarations().keySet()){ + xslDoc.getRootElement().addNamespace(key, this.ruleLanguageParser.getNamespaceDeclarations().get(key)); + } + @SuppressWarnings("unchecked") + List nodes = rulesDoc.getRootElement().selectNodes("//xsl:template"); + + @SuppressWarnings("unchecked") + List varNodes = rulesDoc.getRootElement().selectNodes("/templateroot/xsl:param"); + for (Node node: varNodes){ + xslDoc.getRootElement().add( ((Element)node).detach() ); + } + +// xslDoc.getRootElement().add(rulesDoc.getRootElement().selectSingleNode("//xsl:param[@name='var1']").detach()); + for (Node node: nodes){ + xslDoc.getRootElement().add( ((Element)node).detach() ); // (rulesDoc.getRootElement().aget); + } + } catch (DocumentException e) { + log.error("error in creating stylesheet: " + e); + throw new IllegalStateException(e); + } + return new StringReader(xslDoc.asXML()); + } + + /** + * @param schema the schema to set + */ + public void setSchema(Resource schema) { + this.schema = schema; + } + + /** + * @return the schema + */ + public Resource getSchema() { + return schema; + } + + @Override + public Map getStaticTransformationResults() { + return this.staticResults; + } + + @Override + public Map getJobProperties() { + // TODO Auto-generated method stub + return this.jobConstantMap; + } + + @Override + public Properties getLogInformation() { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/AbstractTransformationFunction.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/AbstractTransformationFunction.java new file mode 100644 index 0000000..933307d --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/AbstractTransformationFunction.java @@ -0,0 +1,12 @@ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import java.util.List; + +public abstract class AbstractTransformationFunction implements + ITransformationFunction { + + List objectRecords; + List resultRecords; + + abstract String execute() throws ProcessingException; +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Convert.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Convert.java new file mode 100644 index 0000000..65cd79c --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Convert.java @@ -0,0 +1,74 @@ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import java.util.LinkedList; +import java.util.List; + +import javax.annotation.Resource; + +import eu.dnetlib.data.collective.transformation.VocabularyRegistry; + +/** + * @author jochen + * + */ +public class Convert extends AbstractTransformationFunction { + + public static final String paramVocabularyName = "vocabularyName"; + public static final String paramFieldValue = "fieldValue"; + public static final String paramDefaultPattern = "defaultPattern"; + public static final String paramFunction = "function"; + + @Resource + private VocabularyRegistry vocabularyRegistry; + + + /** + * not implemented + * @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute() + */ + public String execute() throws ProcessingException { + return null; + } + + /** + * extracts and returns the encoded value as used in the vocabulary + * @param vocabularyName the name of the vocabulary to be used + * @param fieldValues the list of values to normalize + * @return encoded value + * @throws ProcessingException + */ + public String executeSingleValue(String vocabularyName, List fieldValues)throws ProcessingException{ + if (!vocabularyRegistry.getVocabularies().containsKey(vocabularyName)){ + throw new ProcessingException("unknown vocabulary: " + vocabularyName); + } + String returnValue = vocabularyRegistry.getVocabulary(vocabularyName).encoding(fieldValues); + return returnValue; + } + + public List executeAllValues(String vocabularyName, List fieldValues) throws ProcessingException{ + if (!vocabularyRegistry.getVocabularies().containsKey(vocabularyName)){ + throw new ProcessingException("unknown vocabulary: " + vocabularyName); + } + List computedValues = new LinkedList(); + int numOfComputedValues = fieldValues.size(); + if (numOfComputedValues == 0) numOfComputedValues = 1; // return at least 1 value + String returnValue = vocabularyRegistry.getVocabulary(vocabularyName).encoding(fieldValues); + for (int i = 0; i < numOfComputedValues; i++){ + computedValues.add(returnValue); + } + return computedValues; + } + + public List executeFilterByParams(String vocabName, List fieldValues, String defaultPattern, String filterFunction) throws ProcessingException{ + return vocabularyRegistry.getVocabulary(vocabName).encoding(fieldValues, defaultPattern, filterFunction); + } + + public VocabularyRegistry getVocabularyRegistry() { + return vocabularyRegistry; + } + + public void setVocabularyRegistry(VocabularyRegistry vocabularyRegistry) { + this.vocabularyRegistry = vocabularyRegistry; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/DateVocabulary.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/DateVocabulary.java new file mode 100644 index 0000000..3a433cf --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/DateVocabulary.java @@ -0,0 +1,108 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Date; +//import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +//import java.util.Map; + + + + +import org.apache.oro.text.perl.Perl5Util; + + +/** + * @author jochen + * + */ +public class DateVocabulary extends Vocabulary{ + + private static final String filterFuncMin = "min()"; + private String pattern_1 = "/^(\\d{4,4}-\\d{1,2}-\\d{1,2})/"; + private String pattern_2 = "/^(\\d{4,4}-\\d{1,2})$/"; + private String pattern_3 = "/^(\\d{4,4})$/"; + private String pattern_4 = "/^(\\d{1,2}.\\d{1,2}.\\d{4,4})$/"; + private SimpleDateFormat df; + + private transient Perl5Util perl5 = new Perl5Util(); + + public String encoding(List aKeys) throws ProcessingException{ + String tempKey_1 = null; + String tempKey_2 = null; + String tempKey_3 = null; + String currentKey = null; + String twoDigitFormat = String.format("%%0%dd", 2); + + try{ + for (String key: aKeys){ + key = key.trim(); + currentKey = key; + if (perl5.match(pattern_1, key)){ + String[] dateSplitted = perl5.getMatch().toString().split("-"); + String dateNormalized = dateSplitted[0] + "-" + String.format(twoDigitFormat, Integer.parseInt(dateSplitted[1])) + "-" + String.format(twoDigitFormat, Integer.parseInt(dateSplitted[2])); + return dateNormalized; + }else if (perl5.match(pattern_2, key)){ + String[] dateSplitted = key.split("-"); + tempKey_1 = dateSplitted[0] + "-" + String.format(twoDigitFormat, Integer.parseInt(dateSplitted[1])) + "-01"; + }else if (perl5.match(pattern_3, key)){ + tempKey_2 = key + "-01-01"; + }else if (perl5.match(pattern_4, key)){ + String[] components = key.split("[\\-\\/\\.]"); + // ignore this key if it has less than 3 components + if (components.length >= 3) + tempKey_3 = components[2] + "-" + String.format(twoDigitFormat, Integer.parseInt(components[1])) + "-" + String.format(twoDigitFormat, Integer.parseInt(components[0])); + } + } + }catch(Throwable e){ + throw new ProcessingException("Exception thrown in Datevocabulary (tried to match for value '" + currentKey + "'):", e); + } + if (tempKey_1 != null){ + return tempKey_1; + }else if (tempKey_2 != null){ + return tempKey_2; + }else if (tempKey_3 != null){ + return tempKey_3; + }else{ + return ""; + } + } + + @Override + public List encoding(List aKeys, String aDefaultPattern, + String aFilterFunction) throws ProcessingException { + List evList = new LinkedList(); + df = new SimpleDateFormat(aDefaultPattern); + for (String v: aKeys){ + String ev = encoding(Arrays.asList(new String[]{v})); + if (ev.length() > 0){ + try { + if (aFilterFunction.trim().length() > 0 && !evList.isEmpty()) + evList.add( filter(df.parse(ev), df.parse(evList.remove(0)), aFilterFunction) ); + else + evList.add(df.format(df.parse(ev))); + } catch (ParseException e) { + throw new ProcessingException("invalid date format: " + ev); + } + } + } + return evList; + } + + private String filter(Date d1, Date d2, String filter) throws ProcessingException{ + if (filter.equals(filterFuncMin)) + if (d1.before(d2)) + return df.format(d1); + else + return df.format(d2); + else + throw new ProcessingException("unsupported filter function: " + filter); + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Dblookup.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Dblookup.java new file mode 100644 index 0000000..ea39d08 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Dblookup.java @@ -0,0 +1,72 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import java.io.StringReader; + +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; + +import org.w3c.dom.Node; +import org.xml.sax.InputSource; + +import eu.dnetlib.data.collective.transformation.IDatabaseConnector; +import eu.dnetlib.data.collective.transformation.TransformationException; + +/** + * @author jochen + * + */ +public class Dblookup extends AbstractTransformationFunction { + + public static final String paramSqlExpr = "sqlExpr"; + private IDatabaseConnector dbConnector; + /** + * + */ + public Dblookup() { + // TODO Auto-generated constructor stub + } + + /* (non-Javadoc) + * @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute() + */ + @Override + String execute() throws ProcessingException { + // TODO Auto-generated method stub + return null; + } + + /** + * @return the dbConnector + */ + public IDatabaseConnector getDbConnector() { + return dbConnector; + } + + /** + * @param dbConnector the dbConnector to set + */ + public void setDbConnector(IDatabaseConnector dbConnector) { + this.dbConnector = dbConnector; + } + + public LookupRecord getResults(String aSqlExpression) throws TransformationException, XPathExpressionException { + LookupRecord lookupRecord = new LookupRecord(); + XPath xpath = XPathFactory.newInstance().newXPath(); + + for (String record: dbConnector.getResult(aSqlExpression)){ + InputSource inSource = new InputSource(new StringReader(record)); + Node root = (Node)xpath.evaluate("/", inSource, XPathConstants.NODE); + lookupRecord.setRecord(xpath.evaluate("//FIELD[@name='accessinfopackage']/text()", root), + "officialname", xpath.evaluate("//FIELD[@name='officialname']/text()", root)); + lookupRecord.setRecord(xpath.evaluate("//FIELD[@name='accessinfopackage']/text()", root), + "id", xpath.evaluate("//FIELD[@name='id']/text()", root)); + } + return lookupRecord; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Extract.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Extract.java new file mode 100644 index 0000000..69a8fec --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Extract.java @@ -0,0 +1,50 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import java.util.List; + +import eu.dnetlib.data.collective.transformation.TransformationException; + +/** + * @author jochen + * + */ +public class Extract extends AbstractTransformationFunction { + + public static final String paramNameFeature = "feature"; + private IFeatureExtraction featureExtraction; + + /* (non-Javadoc) + * @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute() + */ + @Override + String execute() throws ProcessingException { + // TODO Auto-generated method stub + return null; + } + + public List execute(List aObjectRecords, String aFeature) throws ProcessingException{ + try { + return featureExtraction.execute(aObjectRecords, aFeature); + } catch (TransformationException e) { + throw new ProcessingException(e); + } + } + + /** + * @param featureExtraction the featureExtraction to set + */ + public void setFeatureExtraction(IFeatureExtraction featureExtraction) { + this.featureExtraction = featureExtraction; + } + + /** + * @return the featureExtraction + */ + public IFeatureExtraction getFeatureExtraction() { + return featureExtraction; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IFeatureExtraction.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IFeatureExtraction.java new file mode 100644 index 0000000..127d54f --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IFeatureExtraction.java @@ -0,0 +1,25 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import java.util.List; + +import eu.dnetlib.data.collective.transformation.TransformationException; + + +/** + * @author jochen + * + */ +public interface IFeatureExtraction { + + /** + * applies the extraction of a feature on objectRecords + * @param aObjectRecords + * @param aFeatureName + * @return list of extracted results + * @throws TransformationServiceException + */ + public List execute(List aObjectRecords, String aFeatureName) throws TransformationException; +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/ITransformationFunction.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/ITransformationFunction.java new file mode 100644 index 0000000..add8e83 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/ITransformationFunction.java @@ -0,0 +1,5 @@ +package eu.dnetlib.data.collective.transformation.engine.functions; + +public interface ITransformationFunction { + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IVocabulary.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IVocabulary.java new file mode 100644 index 0000000..19cd01e --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IVocabulary.java @@ -0,0 +1,31 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import java.util.List; + +/** + * @author jochen + * + */ +public interface IVocabulary { + + /** + * return the encoding for a given list of values + * @param keys + * @return the encoding as string + * @throws ProcessingException + */ + public String encoding(List keys) throws ProcessingException; + + /** + * return the encoding for a given list of values using a default pattern and applying a filter function + * @param aKeys + * @param aDefaultPattern + * @param aFilterFunction + * @return the list of encoded values + * @throws ProcessingException + */ + public List encoding(List aKeys, String aDefaultPattern, String aFilterFunction) throws ProcessingException; +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IdentifierExtract.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IdentifierExtract.java new file mode 100644 index 0000000..7891e4b --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/IdentifierExtract.java @@ -0,0 +1,114 @@ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpressionException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.w3c.dom.Document; +import org.w3c.dom.DocumentFragment; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +public class IdentifierExtract extends AbstractTransformationFunction{ + public static final Log log = LogFactory.getLog(IdentifierExtract.class); + public static final String paramXpathExprJson = "xpathExprJson"; + public static final String paramXpathExprInSource = "xpathExprInputSource"; + public static final String paramRegExpr = "regExpr"; + + @Override + String execute() throws ProcessingException { + // TODO Auto-generated method stub + return null; + } + + /** + * extract content matched by a regular expression pattern from a given node and return matched content as a node-list + * @param aXpathExprList + * @param aInput + * @param aRegExpression + * @param aDocument + * @param aXpath + * @return nodeList + * @throws ProcessingException + */ + public NodeList extract(List aXpathExprList, Node aInput, + String aRegExpression, Document aDocument, XPath aXpath) throws ProcessingException { + + log.debug("xpathExprList: " + aXpathExprList); + log.debug("regExpr: " + aRegExpression); + Set identifierSet = new HashSet(); + +// log.debug("node: length: " + aInput.getChildNodes().getLength()); + log.debug("regular expression : " + aRegExpression); + Pattern p = Pattern.compile(aRegExpression); + try { + List textList = extractText(aXpathExprList, aInput, aXpath); + for (String text: textList){ + log.debug("text as input : " + text); + Matcher m = p.matcher(text); + while (m.find()){ + log.debug("extracted identifier: " + m.group()); + identifierSet.add(m.group()); + } + } + return toNodeList(identifierSet, aDocument); + } catch (XPathExpressionException e) { + e.printStackTrace(); + throw new ProcessingException(e); + } catch (ParserConfigurationException e) { + e.printStackTrace(); + throw new ProcessingException(e); + } + } + + /** + * create a list of nodes from a list of string values + * @param aValueSet, set of unique values + * @param aDocument + * @return nodeList + */ + private NodeList toNodeList(Set aValueSet, Document aDocument){ + DocumentFragment dFrag = aDocument.createDocumentFragment(); + Element root = aDocument.createElement("root"); + dFrag.appendChild(root); + for (String value: aValueSet){ + Element eVal = aDocument.createElement("value"); + eVal.setTextContent(value); + root.appendChild(eVal); + } + return dFrag.getChildNodes(); + } + + /** + * extract text from a given node using a list of given xpath expressions + * @param aXpathExprList + * @param aInput + * @param aXpath + * @return list of strings + * @throws XPathExpressionException + * @throws ParserConfigurationException + */ + private List extractText(List aXpathExprList, Node aInput, XPath aXpath) throws XPathExpressionException, ParserConfigurationException{ + + List resultList = new LinkedList(); + for (String xpathExpr: aXpathExprList){ + NodeList nodeList = (NodeList)aXpath.evaluate(xpathExpr, aInput, XPathConstants.NODESET); + log.debug("extract text: nodelist length: " + nodeList.getLength()); + for (int i = 0; i < nodeList.getLength(); i++){ + resultList.add(nodeList.item(i).getTextContent()); + } + } + return resultList; + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Lookup.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Lookup.java new file mode 100644 index 0000000..c9b02d7 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Lookup.java @@ -0,0 +1,34 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * @author jochen + * + */ +public class Lookup extends AbstractTransformationFunction { + public static final Log log = LogFactory.getLog(Lookup.class); + public static final String paramExprIdentifier = "exprIdentifier"; + public static final String paramExprProperty = "exprProperty"; + + /** + * + */ + public Lookup() { + // TODO Auto-generated constructor stub + } + + /* (non-Javadoc) + * @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute() + */ + @Override + String execute() throws ProcessingException { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/LookupRecord.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/LookupRecord.java new file mode 100644 index 0000000..0c7f00a --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/LookupRecord.java @@ -0,0 +1,33 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Properties; + +/** + * @author jochen + * + */ +public class LookupRecord { + + private HashMap recordMap = new LinkedHashMap(); + + public void setRecord(String aRecordKey, String aPropertyKey, String aPropertyValue){ + if (recordMap.containsKey(aRecordKey)){ + recordMap.get(aRecordKey).setProperty(aPropertyKey, aPropertyValue); + }else{ + Properties p = new Properties(); + p.setProperty(aPropertyKey, aPropertyValue); + recordMap.put(aRecordKey, p); + } + } + + public String getPropertyValue(String aRecordKey, String aPropertyKey){ + if (!recordMap.containsKey(aRecordKey)) return "UNKNOWN"; + return recordMap.get(aRecordKey).getProperty(aPropertyKey, "UNKNOWN"); + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/PersonVocabulary.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/PersonVocabulary.java new file mode 100644 index 0000000..a564972 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/PersonVocabulary.java @@ -0,0 +1,26 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import java.util.List; + +import prototype.Person; + +/** + * @author jochen + * + */ +public class PersonVocabulary extends Vocabulary{ + + @Override + public String encoding(List keys)throws ProcessingException{ + Person p; + String result = ""; + for (String input: keys){ + p = new Person(input); + result = p.getNormalisedFullname(); + } + return result; + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/ProcessingException.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/ProcessingException.java new file mode 100644 index 0000000..47121cd --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/ProcessingException.java @@ -0,0 +1,46 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.engine.functions; + +/** + * @author jochen + * + */ +public class ProcessingException extends Exception { + + /** + * + */ + private static final long serialVersionUID = -8648116731979859467L; + + /** + * + */ + public ProcessingException() { + super(); + } + + /** + * @param arg0 + */ + public ProcessingException(String arg0) { + super(arg0); + } + + /** + * @param arg0 + */ + public ProcessingException(Throwable arg0) { + super(arg0); + } + + /** + * @param arg0 + * @param arg1 + */ + public ProcessingException(String arg0, Throwable arg1) { + super(arg0, arg1); + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/RegularExpression.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/RegularExpression.java new file mode 100644 index 0000000..8cb45da --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/RegularExpression.java @@ -0,0 +1,60 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.oro.text.perl.MalformedPerl5PatternException; +import org.apache.oro.text.perl.Perl5Util; + +/** + * @author jochen + * + */ +public class RegularExpression extends AbstractTransformationFunction { + + public static final Log log = LogFactory.getLog(RegularExpression.class); + public static final String paramRegularExpr = "regularExpression"; + public static final String paramExpr1 = "expr1"; + public static final String paramExpr2 = "expr2"; + + private Perl5Util util = new Perl5Util(); + + /* (non-Javadoc) + * @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute() + */ + @Override + String execute() throws ProcessingException { + // TODO Auto-generated method stub + return null; + } + + public String executeSingleValue(String aRegularExpression, String aExpr1, String aExpr2) throws ProcessingException{ + String result = ""; + if (aRegularExpression.startsWith("m/")){ + if (util.match(aRegularExpression, aExpr1)) + result = util.group(1); + }else if (!aRegularExpression.startsWith("s/")){ + // assume match and extract + // throw new ProcessingException("unsupported or invalid regular expression: " + aRegularExpression); + if (util.match(aRegularExpression, aExpr1)){ + String funder = util.group(1).toLowerCase(); + String projectId = util.group(3); + result = funder + "_" + projectId; + } + }else{ + try{ + result = util.substitute(aRegularExpression, aExpr1); + }catch(MalformedPerl5PatternException patternExc){ + log.fatal("aRegularExpression: " + aRegularExpression); + log.fatal("aExpr1: " + aExpr1); + log.fatal(patternExc.getMessage()); + throw new ProcessingException(patternExc); + } + } + return result; + } + + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/RetrieveValue.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/RetrieveValue.java new file mode 100644 index 0000000..a22c5ed --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/RetrieveValue.java @@ -0,0 +1,157 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import java.io.StringReader; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import javax.xml.namespace.NamespaceContext; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; + +import org.apache.commons.lang3.StringEscapeUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.xml.sax.InputSource; + +import eu.dnetlib.common.profile.Resource; +import eu.dnetlib.common.profile.ResourceDao; +import eu.dnetlib.data.collective.transformation.rulelanguage.Argument; + +/** + * @author jochen + * + */ +public class RetrieveValue extends AbstractTransformationFunction { + + public static final Log log = LogFactory.getLog(RetrieveValue.class); + public static final String paramFunctionName = "functionName"; + public static final String paramFunctionProfileId = "functionParameterProfileId"; + public static final String paramFunctionExpr = "functionParameterExpr"; + + public enum FUNCTION {PROFILEFIELD, CURRENTDATE}; + + @javax.annotation.Resource + private ResourceDao resourceDao; + + /* (non-Javadoc) + * @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute() + */ + @Override + String execute() throws ProcessingException { + // TODO Auto-generated method stub + return null; + } + + public String executeSingleValue(String functionName, List arguments, String objRecord, Map namespaceMap) throws ProcessingException{ + String result = ""; + FUNCTION function = FUNCTION.valueOf(functionName); + + switch(function){ + case PROFILEFIELD: + if (arguments.size() != 2){ + throw new ProcessingException("invalid number of arguments - required 2 but found :" + arguments.size()); + } + String arg = ""; + Resource resource = null; + try{ + if (arguments.get(0).isValue()){ + arg = arguments.get(0).getArgument(); + log.debug("retrieve value arg isValue: " + arg); + if (arg.startsWith("collection(")) { // xquery + arg = StringEscapeUtils.unescapeXml(arg); + resource = resourceDao.getResourceByQuery(arg); // query + }else + resource = resourceDao.getResource(arg); // profile id + }else if (arguments.get(0).isInputField()){ + arg = evaluateXpath(objRecord, arguments.get(0).getArgument(), namespaceMap); + log.debug("retrieve value arg isInputField: " + arg); + if (arg.startsWith("collection(")) { // xquery + arg = StringEscapeUtils.unescapeXml(arg); + resource = resourceDao.getResourceByQuery(arg); // query + }else + resource = resourceDao.getResource(arg); // profile id + }else if (arguments.get(0).isJobConst()){ + // TODO + }else if (arguments.get(0).isVariable()){ + // TODO + log.warn("RETRIEVEVALUE: support for variables not yet implemented."); + } + }catch(Exception e){ + throw new ProcessingException(e); + } + + if (resource == null){ + throw new ProcessingException("invalid profileId: " + arg + "; functionName: " + functionName + ", arg1: " + arguments.get(0).getArgument() + ", arg2: " + arguments.get(1).getArgument()); + } + result = resource.getValue(arguments.get(1).getArgument()); // xpath expr + break; + case CURRENTDATE: + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); // TODO format string + result = dateFormat.format(new Date()); + default: + // unsupported + break; + } + return result; + } + + /** + * @return the resourceDao + */ + public ResourceDao getResourceDao() { + return resourceDao; + } + + /** + * @param resourceDao the resourceDao to set + */ + public void setResourceDao(ResourceDao resourceDao) { + this.resourceDao = resourceDao; + } + + private String evaluateXpath(String record, String xpathExpr, Map nsMap){ + XPath xpath = XPathFactory.newInstance().newXPath(); + xpath.setNamespaceContext(new NamespaceContext() { + + @Override + public Iterator getPrefixes(String namespaceURI) { + return null; + } + + @Override + public String getPrefix(String namespaceURI) { + // TODO Auto-generated method stub + return null; + } + + @Override + public String getNamespaceURI(String prefix) { + if ("dri".equals(prefix)){ + return "http://www.driver-repository.eu/namespace/dri"; + }else if ("dr".equals(prefix)){ + return "http://www.driver-repository.eu/namespace/dr"; + }else if ("dc".equals(prefix)){ + return "http://purl.org/dc/elements/1.1/"; + }else if ("oaf".equals(prefix)){ + return "http://namespace.openaire.eu/oaf"; + }else if ("prov".equals(prefix)){ + return "http://www.openarchives.org/OAI/2.0/provenance"; + } + return ""; + } + }); + try { + return xpath.evaluate(xpathExpr, new InputSource(new StringReader(record))); + } catch (XPathExpressionException e) { + log.fatal("cannot evaluate xpath"); + throw new IllegalStateException(e); + } + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Split.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Split.java new file mode 100644 index 0000000..7f79aa4 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Split.java @@ -0,0 +1,86 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.springframework.util.StringUtils; + +/** + * @author js + * + */ +public class Split extends AbstractTransformationFunction { + + public static final Log log = LogFactory.getLog(Split.class); + public static final String paramInputExpr = "inputExpr"; + public static final String paramRegExpr = "regExpr"; + public static final String paramElementName = "elementName"; + + private Map> queueMap = new HashMap>(); + + /* (non-Javadoc) + * @see eu.dnetlib.data.collective.transformation.engine.functions.AbstractTransformationFunction#execute() + */ + @Override + String execute() throws ProcessingException { + // TODO Auto-generated method stub + return null; + } + + /** + * split a given list of values using a delimiter as regularExpression + * @param aInputValue + * @param aRegExpr + * @return the collection of all values splitted + */ + public Collection executeAllValues(List aInputValues, String aRegExpr) throws ProcessingException{ + Collection result = new LinkedList(); + for (String value: aInputValues){ + String[] values = StringUtils.tokenizeToStringArray(value, aRegExpr, true, true); + result.addAll(Arrays.asList(values)); + } + return result; + } + + /** + * split a given list of values stored in an internal queue and return the element from the head of the queue (recursive) + * @param aInputValues + * @param aRegExpr + * @param aCallId + * @return + * @throws ProcessingException + */ + public String executeSingleValue(List aInputValues, String aRegExpr, String aCallId) throws ProcessingException{ + if (!queueMap.containsKey(aCallId)){ + Queue queue = new LinkedList(); + queueMap.put(aCallId, queue); + for (String value: aInputValues){ + String[] values = StringUtils.tokenizeToStringArray(value, aRegExpr, true, true); + queue.addAll(Arrays.asList(values)); + } + } + String result = queueMap.get(aCallId).poll(); + if (result == null){ + queueMap.remove(aCallId); + } + return result; + } + + public String executeSingleValue(String aCallId) throws ProcessingException{ + String result = queueMap.get(aCallId).poll(); + if (result == null){ + queueMap.remove(aCallId); + } + return result; + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Vocabulary.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Vocabulary.java new file mode 100644 index 0000000..7cc9927 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/engine/functions/Vocabulary.java @@ -0,0 +1,209 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import org.dom4j.Node; + +import eu.dnetlib.common.profile.Resource; +import eu.dnetlib.common.utils.XMLUtils; + +/** + * @author jochen + * + */ +public class Vocabulary implements IVocabulary{ + + private List terms; + private Map encodingMap; + private Resource resource; + private boolean isCaseSensitive = true; + private String delimiter = null; + private String name = null; + + /** + * @return the terms + */ + public List getTerms() { + return terms; + } + + /** + * @param terms the terms to set + */ + public void setTerms(List terms) { + this.terms = terms; + } + + /** + * @return the name + */ + public String getName() { + return name; + } + + /** + * @param name the name to set + */ + public void setName(String name) { + this.name = name; + } + + public String getVocabularyName(){ + return resource.getValue("//VOCABULARY_NAME"); + } + + /** + * returns the normalized, encoded String for a given key if found, otherwise a special value -depending on the vocabulary- is returned indicating that it couldn't be normalized + * @param key a list of Strings to encode + * @return a normalized, encoded String + */ + @Override + public String encoding(List keys)throws ProcessingException{ + // take the first best + for (String key: keys){ + key = key.trim(); + if (!isCaseSensitive) + key = key.toLowerCase(); + if (encodingMap.containsKey(key)) + return encodingMap.get(key); + } + if (encodingMap.containsKey("Unknown") || encodingMap.containsKey("unknown")){ + if (isCaseSensitive) return encodingMap.get("Unknown"); + else return encodingMap.get("unknown"); + }else{ + if (isCaseSensitive) return encodingMap.get("Undetermined"); + else return encodingMap.get("undetermined"); + } + } + + class Term{ + String code; + String name; + List synonyms = new LinkedList(); + + void addSynonym(String synonym){ + synonyms.add(synonym); + } + + List getSynonyms(){ + return synonyms; + } + } + + + /** + * init the encoding with the given list of term parameters + * @param termList list of parameters with expected key:value pairs 'name':string, 'encoding':string, 'synonyms':list + */ + @SuppressWarnings("unchecked") + public void setResource(List> aTermList){ + terms = new LinkedList(); + for (Map termMap : aTermList){ + Term t = new Term(); + terms.add(t); + t.name = (String)termMap.get("name"); + t.code = (String)termMap.get("code"); + for (String synonym: (List)termMap.get("synonyms")) + t.addSynonym(synonym); + } + setCode(); + } + + /** + * init the encoding with term parameters from a vocabulary resource profile + * @param resource + */ + public void setResource(Resource resource) { + this.resource = resource; + terms = new LinkedList(); + List nodes = resource.getNodeList("//TERMS/*"); + int index = 1; + for (Node n: nodes){ + Term t = new Term(); + terms.add(t); + try { + t.name = XMLUtils.getNode(n, "//TERM[" + index + "]/@english_name").getText(); + t.code = XMLUtils.getNode(n, "//TERM[" + index + "]/@code").getText(); + List nsynonyms = XMLUtils.getNodes(n, "//TERM[" + index + "]/SYNONYMS/*"); + int indexSynonyms = 1; + for (Node nsynonym: nsynonyms){ + String synonymTerm = XMLUtils.getNode(nsynonym, "//TERM[" + index + "]//SYNONYM[" + indexSynonyms + "]/@term").getText(); + t.addSynonym(synonymTerm); + indexSynonyms++; + } + } catch (Exception e) { + throw new IllegalStateException(e); + } + index++; + } + setCode(); + } + + private void setCode(){ + encodingMap = new TreeMap(); + for (Term t: terms){ + if (isCaseSensitive){ + encodingMap.put(t.name, t.code); + encodingMap.put(t.code, t.code); + }else{ + encodingMap.put(t.name.toLowerCase(), t.code); + encodingMap.put(t.code.toLowerCase(), t.code); + } + if (this.delimiter != null){ + String[] splittedEncodings = t.code.split(this.delimiter); + for (String encoding: splittedEncodings){ + if (isCaseSensitive){ + encodingMap.put(encoding, t.code); + }else{ + encodingMap.put(encoding.toLowerCase(), t.code); + } + } + } + + for (String synonym : t.synonyms){ + if (isCaseSensitive) encodingMap.put(synonym, t.code); + else encodingMap.put(synonym.toLowerCase(), t.code); + } + } + } + + public Resource getResource() { + return resource; + } + + public void setCaseSensitive(boolean isCaseSensitive) { + this.isCaseSensitive = isCaseSensitive; + } + + public boolean isCaseSensitive() { + return isCaseSensitive; + } + + /** + * @param delimiter the delimiter to set + */ + public void setDelimiter(String delimiter) { + this.delimiter = delimiter; + } + + /** + * @return the delimiter + */ + public String getDelimiter() { + return delimiter; + } + + @Override + public List encoding(List aKeys, String aDefaultPattern, + String aFilterFunction) throws ProcessingException { + throw new ProcessingException("no implementation of filtered encoding."); + } + + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/Argument.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/Argument.java new file mode 100644 index 0000000..2348b12 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/Argument.java @@ -0,0 +1,38 @@ +package eu.dnetlib.data.collective.transformation.rulelanguage; + +/** + * @author jochen + * + */ +public class Argument { + + public enum Type {VALUE, INPUTFIELD, JOBCONST, VAR}; + + private Type type; + private String argument; + + public Argument(Type aType, String aArgument) { + this.type = aType; + this.argument = aArgument; + } + + public boolean isValue(){ + return this.type.equals(Type.VALUE); + } + + public boolean isInputField(){ + return this.type.equals(Type.INPUTFIELD); + } + + public boolean isJobConst(){ + return this.type.equals(Type.JOBCONST); + } + + public boolean isVariable(){ + return this.type.equals(Type.VAR); + } + + public String getArgument(){ + return this.argument; + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/Condition.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/Condition.java new file mode 100644 index 0000000..f19bbaf --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/Condition.java @@ -0,0 +1,76 @@ +package eu.dnetlib.data.collective.transformation.rulelanguage; + +/** + * @author jochen + * + */ +public class Condition { + + private String applyExpression; + private String conditionExpression; + private Rules primaryRule; + private Rules secondaryRule; + + /** + * @param applyExpression the applyExpression to set + */ + public void setApplyExpression(String applyExpression) { + this.applyExpression = applyExpression; + } + + /** + * @return the applyExpression + */ + public String getApplyExpression() { + return applyExpression; + } + + /** + * @param conditionExpression the conditionExpression to set + */ + public void setConditionExpression(String conditionExpression) { + this.conditionExpression = conditionExpression; + } + + /** + * @return the conditionExpression + */ + public String getConditionExpression() { + return conditionExpression; + } + + public boolean isPrimary(Rules aRule){ + if (aRule.equals(primaryRule)) return true; + return false; + } + + /** + * @param primaryRule the primaryRule to set + */ + public void setPrimaryRule(Rules primaryRule) { + this.primaryRule = primaryRule; + } + + /** + * @return the primaryRule + */ + public Rules getPrimaryRule() { + return primaryRule; + } + + /** + * @param secondaryRule the secondaryRule to set + */ + public void setSecondaryRule(Rules secondaryRule) { + this.secondaryRule = secondaryRule; + } + + /** + * @return the secondaryRule + */ + public Rules getSecondaryRule() { + return secondaryRule; + } + + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/IRule.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/IRule.java new file mode 100644 index 0000000..4847b57 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/IRule.java @@ -0,0 +1,27 @@ +package eu.dnetlib.data.collective.transformation.rulelanguage; + +/** + * @author jochen + * + */ +public interface IRule { + + public String getUniqueName(); + + public boolean hasCondition(); + + /** + * returns true when the rule has pending rules that set an element + * @return true if it has a ruleSet, false otherwise + */ + public boolean hasSet(); + + public boolean definesVariable(); + + public boolean definesTargetField(); + + public boolean definesTemplate(); + + public boolean definesTemplateMatch(); + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/RuleLanguageParser.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/RuleLanguageParser.java new file mode 100644 index 0000000..04d255f --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/RuleLanguageParser.java @@ -0,0 +1,129 @@ +package eu.dnetlib.data.collective.transformation.rulelanguage; + +import java.io.InputStream; +import java.io.Reader; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTStart; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.FtScript; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ParseException; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.SimpleNode; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyScript.SCRIPTTYPE; +import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall; +import eu.dnetlib.data.collective.transformation.rulelanguage.visitor.AbstractVisitor; +import eu.dnetlib.data.collective.transformation.rulelanguage.visitor.RuleLanguageVisitor; + +/** + * Parser for rule scripts + * @author jochen + * + */ +public class RuleLanguageParser { + + private static final Log log = LogFactory.getLog(RuleLanguageParser.class); + private RuleLanguageVisitor visitor = new RuleLanguageVisitor(); + protected static FtScript scriptParser = null; + private String xslStylesheet = null; + + public void parse(InputStream inStream){ + + if (scriptParser == null) scriptParser = new FtScript(inStream); + scriptParser.ReInit(inStream); + parsingAndTraversing(); + } + + public void parse(Reader inRead){ + if (scriptParser == null) scriptParser = new FtScript(inRead); + scriptParser.ReInit(inRead); + parsingAndTraversing(); + } + + private void parsingAndTraversing(){ + try { + ASTStart start = scriptParser.Start(); + traverseTree(start, visitor); + } catch (ParseException e) { + log.error(e); + throw new IllegalStateException(e); + } + } + + public String getScriptName(){ + return visitor.getScriptName(); + } + + public SCRIPTTYPE getScriptType(){ + return visitor.getScriptType(); + } + + public Map> getElementMappingRules(){ + return visitor.getElementMappingRules(); + } + + public Map getVariableMappingRules(){ + return visitor.getVariableMappingRules(); + } + + public Map getTemplateMappingRules(){ + return visitor.getTemplateMappingRules(); + } + + public List getImportedScripts(){ + return visitor.getImportedScripts(); + } + + public Map getNamespaceDeclarations(){ + return visitor.getNamespaceDeclarations(); + } + + public List getFunctionCalls(){ + return visitor.getFunctionCalls(); + } + + public List> getPreprocessings(){ + return visitor.getPreprocessings(); + } + + public String getXslStylesheet() { + return xslStylesheet; + } + + public void setXslStylesheet(String xslStylesheet) { + this.xslStylesheet = xslStylesheet; + } + + public boolean isXslStylesheet(){ + if (xslStylesheet != null) return true; + else return false; + } + + /** + * adds the rules and name-space declarations from another parser, e.g. a child parser of imported scripts, to this parser + * @param aParser + */ + public void addRulesFromParser(RuleLanguageParser aParser){ + this.visitor.getFunctionCalls().addAll(aParser.getFunctionCalls()); + this.visitor.getElementMappingRules().putAll(aParser.getVisitor().getElementMappingRules()); + this.visitor.getVariableMappingRules().putAll(aParser.getVisitor().getVariableMappingRules()); + this.visitor.getTemplateMappingRules().putAll(aParser.getVisitor().getTemplateMappingRules()); + this.visitor.getNamespaceDeclarations().putAll(aParser.getNamespaceDeclarations()); + } + + protected RuleLanguageVisitor getVisitor(){ + return visitor; + } + + private void traverseTree(SimpleNode node, AbstractVisitor visitor){ + for (int i = 0; i < node.jjtGetNumChildren(); i++){ + SimpleNode sn = (SimpleNode)node.jjtGetChild(i); + sn.jjtAccept(visitor, null); + traverseTree(sn, visitor); + } + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/Rules.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/Rules.java new file mode 100644 index 0000000..d997932 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/Rules.java @@ -0,0 +1,316 @@ +package eu.dnetlib.data.collective.transformation.rulelanguage; + +import java.util.Properties; + +import eu.dnetlib.data.collective.transformation.core.schema.SchemaElement; +import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall; + +/** + * @author jochen + * + */ +public class Rules implements Comparable, IRule{ + + public static final String staticRule = "static"; + private String attribute = ""; + private String targetField = ""; + private String ruleDeclaration = ""; + private String xpath = ""; + private String constant = ""; + private String namespace = ""; + private String variable = ""; + private String template = ""; + private String templateMatch = ""; + private FunctionCall funcCall; + private Condition condition; + private boolean isEmpty = false; + private boolean isSkip = false; + private SchemaElement targetSchemaElement; + private String assignmentVariable = ""; + private RulesSet rulesSet; + private Properties properties = new Properties(); + + public Rules() { + } + + /** + * indicates if the rule is declared as static + * @return true if static, false otherwise + */ + public boolean isStatic(){ + if (ruleDeclaration.equals(staticRule)){ + return true; + } + return false; + } + + /** + * indicates if the rule defines a variable + * @return true if variable is defined, false otherwise + * @see eu.dnetlib.data.collective.transformation.rulelanguage.IRule#definesVariable() + */ + public boolean definesVariable(){ + if (variable.length() > 0) return true; + return false; + } + + public boolean definesTargetField(){ + if (targetField.length() > 0) return true; + return false; + } + + /** + * checks if this rule defines an attribute + * @return true if defines attribute else false + */ + public boolean definesAttribute(){ + if (attribute.length() > 0) return true; + return false; + } + + public boolean definesTemplate(){ + if (template.length() > 0) return true; + return false; + } + + @Override + public boolean definesTemplateMatch() { + if (templateMatch.length() > 0) return true; + return false; + } + + public void setXpath(String xpath) { + this.xpath = xpath; + } + public String getXpath() { + return xpath; + } + + /** + * sets the argument aVariable as the value of the rule + * @param aVariable the variable as a reference to the value + */ + public void setAssignmentVariable(String aVariable){ + this.assignmentVariable = aVariable; + } + + public String getAssignmentVariable(){ + return this.assignmentVariable; + } + + public void setNamespace(String namespace) { + this.namespace = namespace; + } + + public String getNamespace() { + return namespace; + } + + public void setConstant(String constant) { + this.constant = constant; + } + + public String getConstant() { + return constant; + } + + @Deprecated + public void setTargetField(String targetField) { + if (this.variable.length() > 0){ + throw new IllegalStateException("Invalid rule definition: a rule is either defined as an output element or as a variable"); + } + this.targetField = targetField; + } + + /* + * @deprecated replaced by {@Link #getUniqueName()} + */ + @Deprecated + public String getTargetField() { + return targetField; + } + + public void setRuleDeclaration(String ruleDeclaration) { + this.ruleDeclaration = ruleDeclaration; + } + + public String getRuleDeclaration() { + return ruleDeclaration; + } + + /* + * compares two rules objects based on their xpath, function and namespace names + * @see java.lang.Comparable#compareTo(java.lang.Object) + */ + public int compareTo(Rules o) { + if ( + o.targetField.equals(this.targetField) && + o.variable.equals(this.variable) && + o.template.equals(this.template) && + o.templateMatch.equals(this.templateMatch) && + o.ruleDeclaration.equals(this.ruleDeclaration) && + o.namespace.equals(this.namespace) && + o.constant.equals(this.constant) && + o.xpath.equals(this.xpath)){ + return 0; + }else{ + return -1; + } + } + + public void setFunctionCall(FunctionCall funcCall) { + this.funcCall = funcCall; + } + + public FunctionCall getFunctionCall() { + return funcCall; + } + + @Override + public String getUniqueName() { + if (this.definesVariable()) return this.variable; + else if (this.definesTemplate()) return this.template; + return this.targetField; + } + + @Override + public boolean hasCondition() { + if (condition != null) return true; + return false; + } + + /** + * @return the condition + */ + public Condition getCondition() { + return condition; + } + + /** + * @param condition the condition to set + */ + public void setCondition(Condition condition) { + this.condition = condition; + } + + /** + * @param variable the variable to set + */ + public void setVariable(String variable) { + if (this.targetField.length() > 0){ + throw new IllegalStateException("Invalid rule definition: a rule is either defined as an output element or as a variable"); + } + this.variable = variable; + } + + /** + * @return the variable + */ + public String getVariable() { + return variable; + } + + /** + * @param isEmpty the isEmpty to set + */ + public void setEmpty(boolean isEmpty) { + this.isEmpty = isEmpty; + } + + /** + * @return the isEmpty + */ + public boolean isEmpty() { + return isEmpty; + } + + /** + * @param targetSchemaElement the targetSchemaElement to set + */ + public void setTargetSchemaElement(SchemaElement targetSchemaElement) { + this.targetSchemaElement = targetSchemaElement; + } + + /** + * @return the targetSchemaElement + */ + public SchemaElement getTargetSchemaElement() { + return targetSchemaElement; + } + + /** + * @return the template + */ + public String getTemplate() { + return template; + } + + /** + * @param template the template to set + */ + public void setTemplate(String template) { + this.template = template; + } + + /** + * @return the attribute + */ + public String getAttribute() { + return attribute; + } + + /** + * @param attribute the attribute to set + */ + public void setAttribute(String attribute) { + this.attribute = attribute; + } + + /** + * @return the rulesSet + */ + public RulesSet getRulesSet() { + return rulesSet; + } + + /** + * @param rulesSet the rulesSet to set + */ + public void setRulesSet(RulesSet rulesSet) { + this.rulesSet = rulesSet; + } + + /* (non-Javadoc) + * @see eu.dnetlib.data.collective.transformation.rulelanguage.IRule#hasSet() + */ + @Override + public boolean hasSet() { + if (rulesSet != null) return true; + return false; + } + + public String getTemplateMatch() { + return templateMatch; + } + + public void setTemplateMatch(String templateMatch) { + this.templateMatch = templateMatch; + } + + public Properties getProperties() { + return properties; + } + + public void setProperties(Properties properties) { + this.properties = properties; + } + + public boolean isSkip() { + return isSkip; + } + + public void setSkip(boolean isSkip) { + this.isSkip = isSkip; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/RulesSet.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/RulesSet.java new file mode 100644 index 0000000..46531db --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/RulesSet.java @@ -0,0 +1,29 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.rulelanguage; + +import java.util.LinkedList; +import java.util.List; + +/** + * @author jochen + * + */ +public class RulesSet { + + private List pendingRules = new LinkedList(); + /** + * @return the pendingRules + */ + public List getPendingRules() { + return pendingRules; + } + /** + * @param pendingRules the pendingRules to set + */ + public void setPendingRules(List pendingRules) { + this.pendingRules = pendingRules; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyAssign.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyAssign.java new file mode 100644 index 0000000..cb73c76 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyAssign.java @@ -0,0 +1,59 @@ +/* Generated By:JJTree: Do not edit this line. ASTMyAssign.java Version 4.3 */ +/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +public class ASTMyAssign extends AbstractNode { + private static final Log log = LogFactory.getLog(ASTMyAssign.class); + String value = ""; + String field = ""; + boolean isField = false; + boolean isAttribute = false; + + public ASTMyAssign(int id) { + super(id); + } + + public ASTMyAssign(FtScript p, int id) { + super(p, id); + } + + + /** Accept the visitor. **/ + public Object jjtAccept(FtScriptVisitor visitor, Object data) { + return visitor.visit(this, data); + } + + public void setAttribute(String value) { + // unquote + if (value.length() > 0){ + this.value = value.substring(1, value.length() - 1); + log.debug("attribute unquoted: " + this.value); + } + this.isAttribute = true; + } + + public void setFieldExpression(String field){ + this.field = field; + this.isField = true; + } + + public boolean isFieldExpression(){ + return isField; + } + + public boolean isAttribute(){ + return isAttribute; + } + + public String getFieldExpression(){ + return this.field; + } + + public String getValue() { + return value; + } +} +/* JavaCC - OriginalChecksum=f78d0265ec643fa70ae75afa6b875501 (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyAttribute.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyAttribute.java new file mode 100644 index 0000000..f5ec955 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyAttribute.java @@ -0,0 +1,38 @@ +/* Generated By:JJTree: Do not edit this line. ASTMyAttribute.java Version 4.3 */ +/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +public class ASTMyAttribute extends SimpleNode { + private String value; + private String inputField; + + public ASTMyAttribute(int id) { + super(id); + } + + public ASTMyAttribute(FtScript p, int id) { + super(p, id); + } + + /** Accept the visitor. **/ + public Object jjtAccept(FtScriptVisitor visitor, Object data) { + return visitor.visit(this, data); + } + + public void setAttributeValue(String value) { + this.value = value; + } + + public String getAttributeValue(){ + return value; + } + + public String getAttributeInputField() { + return inputField; + } + + public void setAttributeInputField(String inputField) { + this.inputField = inputField; + } +} +/* JavaCC - OriginalChecksum=13918b66ed87534be49661a37cadd261 (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyCondition.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyCondition.java new file mode 100644 index 0000000..ceb1b21 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyCondition.java @@ -0,0 +1,69 @@ +/* Generated By:JJTree: Do not edit this line. ASTMyCondition.java Version 4.3 */ +/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + + +import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; + +public class ASTMyCondition extends SimpleNode { + + String applyExpression = ""; + String conditionalExpression = ""; + Rules primaryRule; + Rules secondaryRule; + + public ASTMyCondition(int id) { + super(id); + } + + public ASTMyCondition(FtScript p, int id) { + super(p, id); + } + + + /** Accept the visitor. **/ + public Object jjtAccept(FtScriptVisitor visitor, Object data) { + return visitor.visit(this, data); + } + + /** + * @return the applyExpression + */ + public String getApplyExpression() { + return applyExpression; + } + + /** + * @param applyExpression the applyExpression to set + */ + public void setApplyExpression(String applyExpression) { + this.applyExpression = applyExpression; + } + + public String getConditionalExpression() { + return conditionalExpression; + } + + public void setConditionalExpression(String conditionalExpression) { + this.conditionalExpression = conditionalExpression; + } + + public Rules getPrimaryRule() { + return primaryRule; + } + + public void setPrimaryRule(Rules primaryRule) { + this.primaryRule = primaryRule; + } + + public Rules getSecondaryRule() { + return secondaryRule; + } + + public void setSecondaryRule(Rules secondaryRule) { + this.secondaryRule = secondaryRule; + } + + +} +/* JavaCC - OriginalChecksum=952b24322923d1de519b8698f1217414 (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyCopy.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyCopy.java new file mode 100644 index 0000000..8c04c56 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyCopy.java @@ -0,0 +1,47 @@ +/* Generated By:JJTree: Do not edit this line. ASTMyCopy.java Version 4.3 */ +/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +import eu.dnetlib.data.collective.transformation.rulelanguage.util.Converter; + +public +class ASTMyCopy extends AbstractNode { + + private String templateMatchName = ""; + private String applyTemplateSelectExpression = ""; + private String copySelectExpression = ""; + + public ASTMyCopy(int id) { + super(id); + } + + public ASTMyCopy(FtScript p, int id) { + super(p, id); + } + + + /** Accept the visitor. **/ + public Object jjtAccept(FtScriptVisitor visitor, Object data) { + return visitor.visit(this, data); + } + + public void copy(String templateMatchName, String applyTemplateSelectExpression, String copySelectExpression) { + this.templateMatchName = Converter.getUnquotedString(templateMatchName); + this.applyTemplateSelectExpression = Converter.getUnquotedString(applyTemplateSelectExpression); + this.copySelectExpression = Converter.getUnquotedString(copySelectExpression); + } + + public String getTemplateMatchName() { + return templateMatchName; + } + + public String getApplyTemplateSelectExpression() { + return applyTemplateSelectExpression; + } + + public String getCopySelectExpression() { + return copySelectExpression; + } + +} +/* JavaCC - OriginalChecksum=0d1889e307d1bb558c977ae924f6bb37 (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyEmpty.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyEmpty.java new file mode 100644 index 0000000..13b3fb8 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyEmpty.java @@ -0,0 +1,32 @@ +/* Generated By:JJTree: Do not edit this line. ASTMyEmpty.java Version 4.3 */ +/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +public +class ASTMyEmpty extends AbstractNode { + + boolean isEmpty = false; + + public ASTMyEmpty(int id) { + super(id); + } + + public ASTMyEmpty(FtScript p, int id) { + super(p, id); + } + + + /** Accept the visitor. **/ + public Object jjtAccept(FtScriptVisitor visitor, Object data) { + return visitor.visit(this, data); + } + + public boolean isEmpty(){ + return isEmpty; + } + + public void setEmpty(boolean aIsEmpty) { + this.isEmpty = aIsEmpty; + } +} +/* JavaCC - OriginalChecksum=3d979737222aaa0a33a774b3e718705f (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyImport.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyImport.java new file mode 100644 index 0000000..da2a331 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyImport.java @@ -0,0 +1,32 @@ +/* Generated By:JJTree: Do not edit this line. ASTMyImport.java Version 4.3 */ +/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +public class ASTMyImport extends SimpleNode { + + private String scriptName = ""; + + public ASTMyImport(int id) { + super(id); + } + + public ASTMyImport(FtScript p, int id) { + super(p, id); + } + + + /** Accept the visitor. **/ + public Object jjtAccept(FtScriptVisitor visitor, Object data) { + return visitor.visit(this, data); + } + + public void setScriptName(String scriptName) { + this.scriptName = scriptName; + } + + public String getScriptName(){ + return this.scriptName; + } + +} +/* JavaCC - OriginalChecksum=ab107c30c540374469a393ab442757d5 (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyNs.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyNs.java new file mode 100644 index 0000000..1322540 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyNs.java @@ -0,0 +1,42 @@ +/* Generated By:JJTree: Do not edit this line. ASTMyNs.java Version 4.3 */ +/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +/** + * @author jochen + * + */ +public class ASTMyNs extends SimpleNode { + + private String nsPrefix; + private String nsUri; + + public ASTMyNs(int id) { + super(id); + } + + public ASTMyNs(FtScript p, int id) { + super(p, id); + } + + + /** Accept the visitor. **/ + public Object jjtAccept(FtScriptVisitor visitor, Object data) { + return visitor.visit(this, data); + } + + public void setNsDeclaration(String aNsPrefix, String aNsUri){ + this.nsPrefix = aNsPrefix; + this.nsUri = aNsUri.substring( (aNsUri.indexOf("\"") + 1), aNsUri.lastIndexOf("\"") ); + } + + public String getNsPrefix(){ + return this.nsPrefix; + } + + public String getNsUri(){ + return this.nsUri; + } + +} +/* JavaCC - OriginalChecksum=9d6d617b7c3f22f3603fcbd13f738170 (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyOp.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyOp.java new file mode 100644 index 0000000..6d4dd51 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyOp.java @@ -0,0 +1,249 @@ +/* Generated By:JJTree: Do not edit this line. ASTMyOp.java Version 4.3 */ +/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import eu.dnetlib.data.collective.transformation.engine.functions.Convert; +import eu.dnetlib.data.collective.transformation.engine.functions.Extract; +import eu.dnetlib.data.collective.transformation.engine.functions.IdentifierExtract; +import eu.dnetlib.data.collective.transformation.engine.functions.Lookup; +import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression; +import eu.dnetlib.data.collective.transformation.engine.functions.Split; +import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; +import eu.dnetlib.data.collective.transformation.rulelanguage.util.Converter; +import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall; + + +/** + * @author jochen + * + */ +public class ASTMyOp extends AbstractNode { + + private boolean isFunGetValue = false; + private boolean isFunConvert = false; + //private boolean isFunConvertString = false; + private boolean isFunRegExpr = false; + private boolean isFunExtract = false; + private boolean isFunSplit = false; + private boolean isFunLookup = false; + private boolean isFunConcat = false; + private boolean isFunIdentifierExtract = false; + private boolean doPreprocess = true; + private String functionName = null; + + private Map paramMap = null; + + private List rulesList = new LinkedList(); + private List concatList = new LinkedList(); + + private String externalFunctionName = null; + + public ASTMyOp(int id) { + super(id); + } + + public ASTMyOp(FtScript p, int id) { + super(p, id); + } + + + /** Accept the visitor. **/ + public Object jjtAccept(FtScriptVisitor visitor, Object data) { + return visitor.visit(this, data); + } + + public void getValue(String functionName){ + this.functionName = functionName; + this.isFunGetValue = true; + this.externalFunctionName = "getValue"; + paramMap = new LinkedHashMap(); + paramMap.put("functionName", functionName); + } + + public String getFunctionName(){ + return functionName; + } + + /** + * creates a new FunctionCall object + * @return a function call object + */ + public FunctionCall createFunctionCall(boolean aIsStatic){ + FunctionCall fc = new FunctionCall(aIsStatic, this.doPreprocess); + fc.setExternalFunctionName(externalFunctionName); + fc.setParameters(paramMap); + fc.setParamList(concatList); + return fc; + } + + public boolean isGetValue(){ + return isFunGetValue; + } + + public boolean isConvert(){ + return isFunConvert; + } + + public boolean isConvertString(){ + return isConvertString(); + } + + public boolean isRegExpr(){ + return isFunRegExpr; + } + + public boolean isExtract(){ + return isFunExtract; + } + + public boolean isFunSplit(){ + return isFunSplit; + } + + public boolean isLookup(){ + return isFunLookup; + } + + public boolean isConcat(){ + return isFunConcat; + } + + public void convert(String inputField, String vocabulary, String defaultPattern, String function){ + // evaluate the inputField arg first then decide to apply 'convert' or 'convertString' + + String converterInputField = Converter.getXpathFromXpathExpr(inputField); + if (Converter.isXpathReturningString(converterInputField)){ + this.externalFunctionName = "convertString"; + //this.isFunConvertString = true; + }else{ + this.externalFunctionName = "convert"; + this.isFunConvert = true; + } + this.doPreprocess = false; + paramMap = new LinkedHashMap(); + paramMap.put(Convert.paramFieldValue, converterInputField); + paramMap.put(Convert.paramVocabularyName, vocabulary); + if (defaultPattern.trim().length() > 0) + paramMap.put(Convert.paramDefaultPattern, defaultPattern.substring( (defaultPattern.indexOf("\"") + 1), defaultPattern.lastIndexOf("\"") )); + if (function.trim().length() > 0) + paramMap.put(Convert.paramFunction, function.substring( (function.indexOf("\"") + 1), function.lastIndexOf("\"") )); + } + + public void extract(String feature){ + this.isFunExtract = true; + this.externalFunctionName = "extract"; + paramMap = new LinkedHashMap(); + paramMap.put(Extract.paramNameFeature, feature); + } + + public void regExpr(String inputExpr1, String inputExpr2, String regularExpr){ + this.isFunRegExpr = true; + this.externalFunctionName = "regExpr"; + this.doPreprocess = false; + paramMap = new LinkedHashMap(); + if (inputExpr1.startsWith("xpath:")){ + paramMap.put(RegularExpression.paramExpr1, Converter.getXpathFromXpathExpr(inputExpr1)); + }else{ + paramMap.put(RegularExpression.paramExpr1, inputExpr1); + } + if (inputExpr2.startsWith("xpath:")){ + paramMap.put(RegularExpression.paramExpr2, Converter.getXpathFromXpathExpr(inputExpr2)); + }else{ + paramMap.put(RegularExpression.paramExpr2, inputExpr2); + } + String regExpr = regularExpr.substring( (regularExpr.indexOf("\"") + 1), regularExpr.lastIndexOf("\"") ); + paramMap.put(RegularExpression.paramRegularExpr, regExpr); + } + + public Map getParamMap(){ + return paramMap; + } + + /** + * split values of an element into multiple elements + * @param inputValue the input value + * @param elementName the name of the target element + * @param regExpr regular expression (delimiter) + */ + public void split(String inputValue, String elementName, String regularExpr) { + // TODO Auto-generated method stub + this.isFunSplit = true; + this.doPreprocess = false; + this.externalFunctionName = "split"; + paramMap = new LinkedHashMap(); + if (inputValue.startsWith("xpath:")){ + paramMap.put(Split.paramInputExpr, Converter.getXpathFromXpathExpr(inputValue)); + }else{ + paramMap.put(Split.paramInputExpr, inputValue); + } + String regExpr = regularExpr.substring( (regularExpr.indexOf("\"") + 1), regularExpr.lastIndexOf("\"") ); + paramMap.put(Split.paramRegExpr, regExpr); + String elemName = elementName.substring( (elementName.indexOf("\"") + 1), elementName.lastIndexOf("\"") ); + paramMap.put(Split.paramElementName, elemName); + } + + public void addRule(Rules aRule) { + // TODO Auto-generated method stub + rulesList.add(aRule); + + } + + /** + * set parameters for the dblookup function + * @param aSqlExpr + */ + public void lookup(String aExprId, String aExprProperty) { + this.isFunLookup = true; + this.doPreprocess = false; + this.externalFunctionName = "lookup"; + paramMap = new LinkedHashMap(); + if (aExprId.startsWith("xpath:")){ + paramMap.put(Lookup.paramExprIdentifier, Converter.getXpathFromXpathExpr(aExprId)); + }else{ + paramMap.put(Lookup.paramExprIdentifier, aExprId); + } + // unquote + String exprProperty; + if (aExprProperty.length() > 0){ + exprProperty = aExprProperty.substring(1, aExprProperty.length() - 1); + paramMap.put(Lookup.paramExprProperty, exprProperty); + } + } + + public void concat(){ + this.isFunConcat = true; + this.doPreprocess = true; + this.externalFunctionName = "concat"; + } + + public void addConcat(String aTerm) { + concatList.add(aTerm); + } + + public void identifierExtract(String aXpathExprJsonString, String aXpathExprInSource, + String aRegExpr) { + this.isFunIdentifierExtract = true; + this.doPreprocess = false; + this.externalFunctionName = "identifierExtract"; + paramMap = new LinkedHashMap(); + if (aXpathExprInSource.startsWith("xpath:")) paramMap.put(IdentifierExtract.paramXpathExprInSource, Converter.getXpathFromXpathExpr(aXpathExprInSource)); + else paramMap.put(IdentifierExtract.paramXpathExprInSource, aXpathExprInSource); +// List xpathExprList = JSONParser.defaultJSONParser().parse(List.class, aXpathExprJsonString); + paramMap.put(IdentifierExtract.paramXpathExprJson, Converter.getUnquotedString(aXpathExprJsonString)); + paramMap.put(IdentifierExtract.paramRegExpr, Converter.getUnquotedString(aRegExpr)); + } + + /** + * @return the isFunIdentifierExtract + */ + public boolean isFunIdentifierExtract() { + return isFunIdentifierExtract; + } + +} +/* JavaCC - OriginalChecksum=3d515ff3345fb356c3993ac1bf1d77cc (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyPreprocess.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyPreprocess.java new file mode 100644 index 0000000..3ec19a8 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyPreprocess.java @@ -0,0 +1,56 @@ +/* Generated By:JJTree: Do not edit this line. ASTMyPreprocess.java Version 4.3 */ +/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +public class ASTMyPreprocess extends SimpleNode { + private String id; + private String funcName; + private String parameter; + + public ASTMyPreprocess(int id) { + super(id); + } + + public ASTMyPreprocess(FtScript p, int id) { + super(p, id); + } + + /** Accept the visitor. **/ + public Object jjtAccept(FtScriptVisitor visitor, Object data) { + return visitor.visit(this, data); + } + + public String getFunctionName(){ + return this.funcName; + } + + public String getParameter(){ + return this.parameter; + } + + public String getId(){ + return id; + } + + public void preprocess(String aPreprocessId, String aFunction, String aParameter) { + this.id = aPreprocessId; + this.funcName = aFunction; + // unquote + if (aParameter.length() > 0){ + this.parameter = aParameter.substring(1, aParameter.length() - 1); + } + } + + public void preprocess(String aFunction, String aParameter){ + this.funcName = aFunction; + // unquote + if (aParameter.length() > 0){ + this.parameter = aParameter.substring(1, aParameter.length() - 1); + } + } + + public void preprocess(String aFunction) { + this.funcName = aFunction; + } +} +/* JavaCC - OriginalChecksum=b9229360af18a53de1ce87664846e442 (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyScript.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyScript.java new file mode 100644 index 0000000..ba7ec82 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMyScript.java @@ -0,0 +1,48 @@ +/* Generated By:JJTree: Do not edit this line. ASTMyScript.java Version 4.3 */ +/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +public class ASTMyScript extends SimpleNode { + + public static enum SCRIPTTYPE{MAINSCRIPT, SUBSCRIPT}; + + private String scriptName = ""; + private SCRIPTTYPE scriptType; + + public ASTMyScript(int id) { + super(id); + } + + public ASTMyScript(FtScript p, int id) { + super(p, id); + } + + + /** Accept the visitor. **/ + public Object jjtAccept(FtScriptVisitor visitor, Object data) { + return visitor.visit(this, data); + } + + public void setScript(String scriptName) { + this.scriptName = scriptName.substring( (scriptName.indexOf("\"") + 1), scriptName.lastIndexOf("\"") ); + } + + public String getScript() { + return scriptName; + } + + /** + * @param scriptType the scriptType to set + */ + public void setScriptType(SCRIPTTYPE scriptType) { + this.scriptType = scriptType; + } + + /** + * @return the scriptType + */ + public SCRIPTTYPE getScriptType() { + return scriptType; + } +} +/* JavaCC - OriginalChecksum=c9a44759b6c7b4b163c6b10f67226e91 (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMySet.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMySet.java new file mode 100644 index 0000000..8e7707e --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMySet.java @@ -0,0 +1,68 @@ +/* Generated By:JJTree: Do not edit this line. ASTMySet.java Version 4.3 */ +/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +import java.util.LinkedList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; + +/** + * This class is primarily designed to assign values to the element and attributes of the element + * @author jochen + * + */ +public class ASTMySet extends AbstractNode { + + private static final Log log = LogFactory.getLog(ASTMySet.class); + private List rulesList = new LinkedList(); + private String expressionValue; + private String constValue; + private boolean isExpressionValue = false; + + + public ASTMySet(int id) { + super(id); + } + + public ASTMySet(FtScript p, int id) { + super(p, id); + } + + /** Accept the visitor. **/ + public Object jjtAccept(FtScriptVisitor visitor, Object data) { + return visitor.visit(this, data); + } + + public void addAttributeRule(Rules aRule){ + log.debug("called method: ASTMySet.addRule " + aRule.getUniqueName()+ + " attribute: " + aRule.getAttribute() + " defines variable?: " + aRule.definesVariable()); + rulesList.add(aRule); + } + + public List getRules(){ + return this.rulesList; + } + + public void setValueExpression(String aExpression) { + log.debug("expression: " + aExpression); + this.expressionValue = aExpression; + this.isExpressionValue = true; + } + + public String getValueExpression(){ + return this.expressionValue; + } + + public boolean isValueExpression(){ + return this.isExpressionValue; + } + + public void setAttribute(String aValue) { + this.constValue = aValue; + } +} +/* JavaCC - OriginalChecksum=1a796456845c74ed0ee62389483ce5a7 (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMySkip.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMySkip.java new file mode 100644 index 0000000..a297c7c --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTMySkip.java @@ -0,0 +1,29 @@ +/* Generated By:JJTree: Do not edit this line. ASTMySkip.java Version 4.3 */ +/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; + +public +class ASTMySkip extends AbstractNode { + public ASTMySkip(int id) { + super(id); + } + + public ASTMySkip(FtScript p, int id) { + super(p, id); + } + + + /** Accept the visitor. **/ + public Object jjtAccept(FtScriptVisitor visitor, Object data) { + return visitor.visit(this, data); + } + + public void skipRecord() { + // TODO Auto-generated method stub + + } + +} +/* JavaCC - OriginalChecksum=149f1dfbf543fde562b05c417044a68a (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTStart.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTStart.java new file mode 100644 index 0000000..5925290 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ASTStart.java @@ -0,0 +1,21 @@ +/* Generated By:JJTree: Do not edit this line. ASTStart.java Version 4.3 */ +/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +public +class ASTStart extends SimpleNode { + public ASTStart(int id) { + super(id); + } + + public ASTStart(FtScript p, int id) { + super(p, id); + } + + + /** Accept the visitor. **/ + public Object jjtAccept(FtScriptVisitor visitor, Object data) { + return visitor.visit(this, data); + } +} +/* JavaCC - OriginalChecksum=97710dc79a4caf565e6b2feba3f4fd69 (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/AbstractNode.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/AbstractNode.java new file mode 100644 index 0000000..5b1a34d --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/AbstractNode.java @@ -0,0 +1,42 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; + +/** + * @author jochen + * + */ +public abstract class AbstractNode extends SimpleNode { + + Rules rule; + + /** + * @param i + */ + public AbstractNode(int i) { + super(i); + // TODO Auto-generated constructor stub + } + + /** + * @param p + * @param i + */ + public AbstractNode(FtScript p, int i) { + super(p, i); + // TODO Auto-generated constructor stub + } + + public Rules getRule() { + return rule; + } + + public void setRule(Rules rule) { + this.rule = rule; + } + + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScript.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScript.java new file mode 100644 index 0000000..5480604 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScript.java @@ -0,0 +1,1333 @@ +/* Generated By:JJTree&JavaCC: Do not edit this line. FtScript.java */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; +import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; +public class FtScript/*@bgen(jjtree)*/implements FtScriptTreeConstants, FtScriptConstants {/*@bgen(jjtree)*/ + protected JJTFtScriptState jjtree = new JJTFtScriptState(); + + final public ASTStart Start() throws ParseException { + /*@bgen(jjtree) Start */ + ASTStart jjtn000 = new ASTStart(JJTSTART); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); + try { + script(); + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + {if (true) return jjtn000;} + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + {if (true) throw (RuntimeException)jjte000;} + } + if (jjte000 instanceof ParseException) { + {if (true) throw (ParseException)jjte000;} + } + {if (true) throw (Error)jjte000;} + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } + throw new Error("Missing return statement in function"); + } + +/** + * id = value + */ + final public void assign(Rules r) throws ParseException { + /*@bgen(jjtree) MyAssign */ + ASTMyAssign jjtn000 = new ASTMyAssign(JJTMYASSIGN); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000);String value = ""; Token t; + try { + jjtn000.setRule(r); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case DOLLAR_QNAME: + case XPATH: + case JOBCONST: + t = inputField(); + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + jjtn000.setFieldExpression(t.image); + break; + case QUOTED_STRING: + value = quotedString(); + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + jjtn000.setAttribute(value); + break; + default: + jj_la1[0] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + {if (true) throw (RuntimeException)jjte000;} + } + if (jjte000 instanceof ParseException) { + {if (true) throw (ParseException)jjte000;} + } + {if (true) throw (Error)jjte000;} + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } + } + + final public void attrib_list() throws ParseException { + jj_consume_token(LBRACKET); + label_1: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case DOLLAR_QNAME: + case XPATH: + case JOBCONST: + case IDENTIFIER: + ; + break; + default: + jj_la1[1] = jj_gen; + break label_1; + } + attribute(); + } + jj_consume_token(RBRACKET); + } + + final public void attribute() throws ParseException { + /*@bgen(jjtree) MyAttribute */ + ASTMyAttribute jjtn000 = new ASTMyAttribute(JJTMYATTRIBUTE); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000);String value; Token t=null; + try { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case IDENTIFIER: + value = identifier(); + jjtn000.setAttributeValue(value); + break; + case DOLLAR_QNAME: + case XPATH: + case JOBCONST: + t = inputField(); + value = t.image; + jjtn000.setAttributeInputField(value); + break; + default: + jj_la1[2] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case COMMA: + jj_consume_token(COMMA); + break; + default: + jj_la1[3] = jj_gen; + ; + } + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + {if (true) throw (RuntimeException)jjte000;} + } + if (jjte000 instanceof ParseException) { + {if (true) throw (ParseException)jjte000;} + } + {if (true) throw (Error)jjte000;} + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } + } + +/** + * a conditional Rule, which contains the condition, the Rule on which the condition holds and the alternative rule + */ + final public void conditionalStmt() throws ParseException { + /*@bgen(jjtree) MyCondition */ + ASTMyCondition jjtn000 = new ASTMyCondition(JJTMYCONDITION); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000);Rules r1 = new Rules(); Rules r2 = new Rules(); Token cond; Token apply; + try { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case APPLY: + jj_consume_token(APPLY); + apply = inputField(); + jjtn000.setApplyExpression(apply.image); + break; + default: + jj_la1[4] = jj_gen; + ; + } + jj_consume_token(IF); + cond = inputField(); + jjtn000.setConditionalExpression(cond.image); jjtn000.setPrimaryRule(r1); jjtn000.setSecondaryRule(r2); + rule(r1); + jj_consume_token(ELSE); + rule(r2); + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + {if (true) throw (RuntimeException)jjte000;} + } + if (jjte000 instanceof ParseException) { + {if (true) throw (ParseException)jjte000;} + } + {if (true) throw (Error)jjte000;} + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } + } + + final public void empty(Rules r) throws ParseException { + /*@bgen(jjtree) MyEmpty */ + ASTMyEmpty jjtn000 = new ASTMyEmpty(JJTMYEMPTY); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); + try { + jjtn000.setRule(r); + jj_consume_token(EMPTY); + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + jjtn000.setEmpty(true); + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } + } + + final public String identifier() throws ParseException { + Token t; + t = jj_consume_token(IDENTIFIER); + {if (true) return t.image;} + throw new Error("Missing return statement in function"); + } + + final public void importDeclaration() throws ParseException { + /*@bgen(jjtree) MyImport */ + ASTMyImport jjtn000 = new ASTMyImport(JJTMYIMPORT); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000);String scriptName; + try { + jj_consume_token(IMPORT); + jj_consume_token(LPAREN); + scriptName = identifier(); + jj_consume_token(RPAREN); + jj_consume_token(SEMICOLON); + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + jjtn000.setScriptName(scriptName); + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + {if (true) throw (RuntimeException)jjte000;} + } + if (jjte000 instanceof ParseException) { + {if (true) throw (ParseException)jjte000;} + } + {if (true) throw (Error)jjte000;} + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } + } + + final public Token anyExpression() throws ParseException { + Token t; + t = jj_consume_token(IDENTIFIER); + {if (true) return t;} + throw new Error("Missing return statement in function"); + } + + final public Token inputField() throws ParseException { + Token t; + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case XPATH: + t = jj_consume_token(XPATH); + {if (true) return t;} + break; + case JOBCONST: + t = jj_consume_token(JOBCONST); + {if (true) return t;} + break; + case DOLLAR_QNAME: + t = jj_consume_token(DOLLAR_QNAME); + {if (true) return t;} + break; + default: + jj_la1[5] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + throw new Error("Missing return statement in function"); + } + +/* + * becomes obsolete + */ + final public String key() throws ParseException { + String key; + key = identifier(); + jj_consume_token(EQUAL); + {if (true) return key;} + throw new Error("Missing return statement in function"); + } + + final public void literal() throws ParseException { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CHARACTER_LITERAL: + jj_consume_token(CHARACTER_LITERAL); + break; + case STRING_LITERAL: + jj_consume_token(STRING_LITERAL); + break; + default: + jj_la1[6] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } + + final public void nsDeclaration() throws ParseException { + /*@bgen(jjtree) MyNs */ + ASTMyNs jjtn000 = new ASTMyNs(JJTMYNS); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000);String nsPrefix; String nsUri; + try { + jj_consume_token(DECLARE_NAMESPACE); + nsPrefix = identifier(); + jj_consume_token(EQUAL); + nsUri = quotedString(); + jj_consume_token(SEMICOLON); + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + jjtn000.setNsDeclaration(nsPrefix, nsUri); + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + {if (true) throw (RuntimeException)jjte000;} + } + if (jjte000 instanceof ParseException) { + {if (true) throw (ParseException)jjte000;} + } + {if (true) throw (Error)jjte000;} + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } + } + + final public void preprocess() throws ParseException { + /*@bgen(jjtree) MyPreprocess */ + ASTMyPreprocess jjtn000 = new ASTMyPreprocess(JJTMYPREPROCESS); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000);String preprocessId; + try { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case PREPROCESS: + jj_consume_token(PREPROCESS); + preprocessId = identifier(); + jj_consume_token(EQUAL); + jj_consume_token(DBLOOKUP); + jj_consume_token(LPAREN); + String sqlExpr; + sqlExpr = quotedString(); + jj_consume_token(RPAREN); + jjtn000.preprocess(preprocessId, "dblookup", sqlExpr); + jj_consume_token(SEMICOLON); + break; + case BLACKLIST: + jj_consume_token(BLACKLIST); + jj_consume_token(LPAREN); + String blacklistDataSourceId; + blacklistDataSourceId = quotedString(); + jj_consume_token(RPAREN); + jjtn000.preprocess("blacklist", blacklistDataSourceId); + jj_consume_token(SEMICOLON); + break; + default: + jj_la1[7] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + {if (true) throw (RuntimeException)jjte000;} + } + if (jjte000 instanceof ParseException) { + {if (true) throw (ParseException)jjte000;} + } + {if (true) throw (Error)jjte000;} + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } + } + + final public void set(Rules r) throws ParseException { + /*@bgen(jjtree) MySet */ + ASTMySet jjtn000 = new ASTMySet(JJTMYSET); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000);Token expr; String value = ""; + try { + jjtn000.setRule(r); + jj_consume_token(SET); + jj_consume_token(LPAREN); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case DOLLAR_QNAME: + case XPATH: + case JOBCONST: + expr = inputField(); + jjtn000.setValueExpression(expr.image); + break; + case QUOTED_STRING: + value = quotedString(); + jjtn000.setAttribute(value); + break; + default: + jj_la1[8] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + label_2: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case COMMA: + ; + break; + default: + jj_la1[9] = jj_gen; + break label_2; + } + jj_consume_token(COMMA); + r = new Rules(); + rule(r); + jjtn000.addAttributeRule(r); + } + jj_consume_token(RPAREN); + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + {if (true) throw (RuntimeException)jjte000;} + } + if (jjte000 instanceof ParseException) { + {if (true) throw (ParseException)jjte000;} + } + {if (true) throw (Error)jjte000;} + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } + } + + final public void skip(Rules r) throws ParseException { + /*@bgen(jjtree) MySkip */ + ASTMySkip jjtn000 = new ASTMySkip(JJTMYSKIP); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); + try { + jjtn000.setRule(r); + jj_consume_token(SKIPRECORD); + jjtn000.skipRecord(); + jj_consume_token(LPAREN); + jj_consume_token(RPAREN); + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } + } + + final public void copy(Rules r) throws ParseException { + /*@bgen(jjtree) MyCopy */ + ASTMyCopy jjtn000 = new ASTMyCopy(JJTMYCOPY); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); + try { + jjtn000.setRule(r); + jj_consume_token(COPY); + jj_consume_token(LPAREN); + String templateMatchExpression; String applyTemplateSelectExpression; String copySelectExpression; + templateMatchExpression = quotedString(); + jj_consume_token(COMMA); + applyTemplateSelectExpression = quotedString(); + jj_consume_token(COMMA); + copySelectExpression = quotedString(); + jj_consume_token(RPAREN); + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + jjtn000.copy(templateMatchExpression, applyTemplateSelectExpression, copySelectExpression); + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + {if (true) throw (RuntimeException)jjte000;} + } + if (jjte000 instanceof ParseException) { + {if (true) throw (ParseException)jjte000;} + } + {if (true) throw (Error)jjte000;} + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } + } + + final public void op(Rules r) throws ParseException { + /*@bgen(jjtree) MyOp */ + ASTMyOp jjtn000 = new ASTMyOp(JJTMYOP); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000);String functionName = ""; Token expr; Token expr2; Token vocab; String defaultPattern = ""; String function = ""; String elementName = ""; String regExpr; Token feature; + try { + jjtn000.setRule(r); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case GETVALUE: + jj_consume_token(GETVALUE); + jj_consume_token(LPAREN); + functionName = identifier(); + jj_consume_token(COMMA); + attrib_list(); + jj_consume_token(RPAREN); + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + jjtn000.getValue(functionName); + break; + case CONVERT: + jj_consume_token(CONVERT); + jj_consume_token(LPAREN); + expr = inputField(); + jj_consume_token(COMMA); + vocab = jj_consume_token(IDENTIFIER); + label_3: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case COMMA: + ; + break; + default: + jj_la1[10] = jj_gen; + break label_3; + } + jj_consume_token(COMMA); + defaultPattern = quotedString(); + jj_consume_token(COMMA); + function = quotedString(); + } + jj_consume_token(RPAREN); + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + jjtn000.convert(expr.image, vocab.image, defaultPattern, function); + break; + case EXTRACT: + jj_consume_token(EXTRACT); + jj_consume_token(LPAREN); + feature = jj_consume_token(IDENTIFIER); + jj_consume_token(RPAREN); + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + jjtn000.extract(feature.image); + break; + case REGEXPR: + jj_consume_token(REGEXPR); + jj_consume_token(LPAREN); + expr = inputField(); + jj_consume_token(COMMA); + expr2 = inputField(); + jj_consume_token(COMMA); + regExpr = quotedString(); + jj_consume_token(RPAREN); + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + jjtn000.regExpr(expr.image, expr2.image, regExpr); + break; + case SPLIT: + jj_consume_token(SPLIT); + jj_consume_token(LPAREN); + expr = inputField(); + jj_consume_token(COMMA); + elementName = quotedString(); + jj_consume_token(COMMA); + regExpr = quotedString(); + jj_consume_token(RPAREN); + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + jjtn000.split(expr.image, elementName, regExpr); + break; + case LOOKUP: + jj_consume_token(LOOKUP); + String propertyKey; + jj_consume_token(LPAREN); + expr = inputField(); + jj_consume_token(COMMA); + propertyKey = quotedString(); + jj_consume_token(RPAREN); + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + jjtn000.lookup(expr.image, propertyKey); + break; + case IDENTIFIEREXTRACT: + jj_consume_token(IDENTIFIEREXTRACT); + String xpathExprJsonString; Token xpathExprInputSource; + jj_consume_token(LPAREN); + // "{//abc, //def }" ?json list of xpath-expr + // xpath-expr of input source + // regExpr + xpathExprJsonString = singleQuotedString(); + jj_consume_token(COMMA); + xpathExprInputSource = inputField(); + jj_consume_token(COMMA); + regExpr = singleQuotedString(); + jj_consume_token(RPAREN); + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + jjtn000.identifierExtract(xpathExprJsonString, xpathExprInputSource.image, regExpr); + break; + case CONCAT: + jj_consume_token(CONCAT); + jjtn000.concat(); + jj_consume_token(LPAREN); + String v; Token t; + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case QUOTED_STRING: + v = quotedString(); + jjtn000.addConcat(v); + break; + case DOLLAR_QNAME: + t = jj_consume_token(DOLLAR_QNAME); + jjtn000.addConcat(t.image); + break; + default: + jj_la1[11] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + label_4: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case COMMA: + ; + break; + default: + jj_la1[12] = jj_gen; + break label_4; + } + jj_consume_token(COMMA); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case QUOTED_STRING: + v = quotedString(); + jjtn000.addConcat(v); + break; + case DOLLAR_QNAME: + t = jj_consume_token(DOLLAR_QNAME); + jjtn000.addConcat(t.image); + break; + default: + jj_la1[13] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } + jj_consume_token(RPAREN); + break; + default: + jj_la1[14] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + {if (true) throw (RuntimeException)jjte000;} + } + if (jjte000 instanceof ParseException) { + {if (true) throw (ParseException)jjte000;} + } + {if (true) throw (Error)jjte000;} + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } + } + + final public String leftExprOutputField() throws ParseException { + String leftExpr; + leftExpr = identifier(); + jj_consume_token(EQUAL); + {if (true) return leftExpr;} + throw new Error("Missing return statement in function"); + } + + final public String leftExprVar() throws ParseException { + String leftExpr; + leftExpr = variable(); + jj_consume_token(EQUAL); + {if (true) return leftExpr;} + throw new Error("Missing return statement in function"); + } + + final public String leftExprTemplate() throws ParseException { + String leftExpr; + leftExpr = template(); + jj_consume_token(EQUAL); + {if (true) return leftExpr;} + throw new Error("Missing return statement in function"); + } + + final public String singleQuotedString() throws ParseException { + Token t; + t = jj_consume_token(SINGLE_QUOTE); + {if (true) return t.image;} + throw new Error("Missing return statement in function"); + } + + final public String quotedString() throws ParseException { + Token t; + t = jj_consume_token(QUOTED_STRING); + {if (true) return t.image;} + throw new Error("Missing return statement in function"); + } + + final public void rule(Rules r) throws ParseException { + String ruleDecl; String leftExpr; + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case STATIC: + ruleDecl = ruleDecl(); + r.setRuleDeclaration(ruleDecl); + break; + default: + jj_la1[15] = jj_gen; + ; + } + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case IDENTIFIER: + leftExpr = leftExprOutputField(); + String[] fieldArray = leftExpr.split("@"); + r.setTargetField(fieldArray[0]); + if (fieldArray.length > 1) + { + r.setAttribute(fieldArray[1]); + } + break; + case DOLLAR_QNAME: + leftExpr = leftExprVar(); + r.setVariable(leftExpr); + break; + case PERCENT_QNAME: + leftExpr = leftExprTemplate(); + r.setTemplate(leftExpr); + break; + default: + jj_la1[16] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + if (jj_2_1(2)) { + assign(r); + } else { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case EXTRACT: + case IDENTIFIEREXTRACT: + case CONVERT: + case REGEXPR: + case GETVALUE: + case SPLIT: + case LOOKUP: + case CONCAT: + op(r); + break; + case SET: + set(r); + break; + case COPY: + copy(r); + break; + case EMPTY: + empty(r); + break; + case SKIPRECORD: + skip(r); + break; + default: + jj_la1[17] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } + jj_consume_token(SEMICOLON); + } + + final public String ruleDecl() throws ParseException { + Token t; + t = jj_consume_token(STATIC); + {if (true) return t.image;} + throw new Error("Missing return statement in function"); + } + + final public void script() throws ParseException { + scriptDeclaration(); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case IMPORT: + importDeclaration(); + break; + default: + jj_la1[18] = jj_gen; + ; + } + label_5: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case DECLARE_NAMESPACE: + ; + break; + default: + jj_la1[19] = jj_gen; + break label_5; + } + nsDeclaration(); + } + label_6: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case PREPROCESS: + case BLACKLIST: + ; + break; + default: + jj_la1[20] = jj_gen; + break label_6; + } + preprocess(); + } + label_7: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case IF: + case STATIC: + case APPLY: + case DOLLAR_QNAME: + case PERCENT_QNAME: + case IDENTIFIER: + ; + break; + default: + jj_la1[21] = jj_gen; + break label_7; + } + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case STATIC: + case DOLLAR_QNAME: + case PERCENT_QNAME: + case IDENTIFIER: + stmt(); + break; + case IF: + case APPLY: + conditionalStmt(); + break; + default: + jj_la1[22] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } + jj_consume_token(END); + jj_consume_token(0); + } + + final public void scriptDeclaration() throws ParseException { + /*@bgen(jjtree) MyScript */ + ASTMyScript jjtn000 = new ASTMyScript(JJTMYSCRIPT); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000);String scriptName; ASTMyScript.SCRIPTTYPE scriptType; + try { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case DECLARE_SCRIPT: + jj_consume_token(DECLARE_SCRIPT); + jjtn000.setScriptType(ASTMyScript.SCRIPTTYPE.MAINSCRIPT); + break; + case DECLARE_SUBSCRIPT: + jj_consume_token(DECLARE_SUBSCRIPT); + jjtn000.setScriptType(ASTMyScript.SCRIPTTYPE.SUBSCRIPT); + break; + default: + jj_la1[23] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + scriptName = quotedString(); + jj_consume_token(SEMICOLON); + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + jjtn000.setScript(scriptName); + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + {if (true) throw (RuntimeException)jjte000;} + } + if (jjte000 instanceof ParseException) { + {if (true) throw (ParseException)jjte000;} + } + {if (true) throw (Error)jjte000;} + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } + } + + final public void stmt() throws ParseException { + Rules r = new Rules(); + rule(r); + } + + final public String string() throws ParseException { + Token t; + t = jj_consume_token(STRING_LITERAL); + {if (true) return t.image.substring(1, t.image.length()-1);} + throw new Error("Missing return statement in function"); + } + + final public String variable() throws ParseException { + Token t; + t = jj_consume_token(DOLLAR_QNAME); + {if (true) return t.image;} + throw new Error("Missing return statement in function"); + } + + final public String template() throws ParseException { + Token t; + t = jj_consume_token(PERCENT_QNAME); + {if (true) return t.image;} + throw new Error("Missing return statement in function"); + } + + private boolean jj_2_1(int xla) { + jj_la = xla; jj_lastpos = jj_scanpos = token; + try { return !jj_3_1(); } + catch(LookaheadSuccess ls) { return true; } + finally { jj_save(0, xla); } + } + + private boolean jj_3R_12() { + if (jj_scan_token(QUOTED_STRING)) return true; + return false; + } + + private boolean jj_3R_11() { + Token xsp; + xsp = jj_scanpos; + if (jj_3R_13()) { + jj_scanpos = xsp; + if (jj_3R_14()) { + jj_scanpos = xsp; + if (jj_3R_15()) return true; + } + } + return false; + } + + private boolean jj_3R_13() { + if (jj_scan_token(XPATH)) return true; + return false; + } + + private boolean jj_3R_10() { + if (jj_3R_12()) return true; + return false; + } + + private boolean jj_3_1() { + if (jj_3R_8()) return true; + return false; + } + + private boolean jj_3R_8() { + Token xsp; + xsp = jj_scanpos; + if (jj_3R_9()) { + jj_scanpos = xsp; + if (jj_3R_10()) return true; + } + return false; + } + + private boolean jj_3R_15() { + if (jj_scan_token(DOLLAR_QNAME)) return true; + return false; + } + + private boolean jj_3R_9() { + if (jj_3R_11()) return true; + return false; + } + + private boolean jj_3R_14() { + if (jj_scan_token(JOBCONST)) return true; + return false; + } + + /** Generated Token Manager. */ + public FtScriptTokenManager token_source; + SimpleCharStream jj_input_stream; + /** Current token. */ + public Token token; + /** Next token. */ + public Token jj_nt; + private int jj_ntk; + private Token jj_scanpos, jj_lastpos; + private int jj_la; + private int jj_gen; + final private int[] jj_la1 = new int[24]; + static private int[] jj_la1_0; + static private int[] jj_la1_1; + static private int[] jj_la1_2; + static { + jj_la1_init_0(); + jj_la1_init_1(); + jj_la1_init_2(); + } + private static void jj_la1_init_0() { + jj_la1_0 = new int[] {0x0,0x0,0x0,0x0,0x20000000,0x0,0x0,0x800000,0x0,0x0,0x0,0x0,0x0,0x0,0x407c000,0x8000000,0x0,0x470fc000,0x200,0x100000,0x800000,0x28000800,0x28000800,0x600000,}; + } + private static void jj_la1_init_1() { + jj_la1_1 = new int[] {0x16800000,0x6800000,0x6800000,0x80,0x0,0x6800000,0xc0000000,0x1,0x16800000,0x80,0x80,0x10800000,0x80,0x10800000,0x6,0x0,0x1800000,0x6,0x0,0x0,0x1,0x1800000,0x1800000,0x0,}; + } + private static void jj_la1_init_2() { + jj_la1_2 = new int[] {0x0,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x0,0x0,0x0,0x2,0x2,0x0,}; + } + final private JJCalls[] jj_2_rtns = new JJCalls[1]; + private boolean jj_rescan = false; + private int jj_gc = 0; + + /** Constructor with InputStream. */ + public FtScript(java.io.InputStream stream) { + this(stream, null); + } + /** Constructor with InputStream and supplied encoding */ + public FtScript(java.io.InputStream stream, String encoding) { + try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } + token_source = new FtScriptTokenManager(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 24; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + /** Reinitialise. */ + public void ReInit(java.io.InputStream stream) { + ReInit(stream, null); + } + /** Reinitialise. */ + public void ReInit(java.io.InputStream stream, String encoding) { + try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } + token_source.ReInit(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jjtree.reset(); + jj_gen = 0; + for (int i = 0; i < 24; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + /** Constructor. */ + public FtScript(java.io.Reader stream) { + jj_input_stream = new SimpleCharStream(stream, 1, 1); + token_source = new FtScriptTokenManager(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 24; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + /** Reinitialise. */ + public void ReInit(java.io.Reader stream) { + jj_input_stream.ReInit(stream, 1, 1); + token_source.ReInit(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jjtree.reset(); + jj_gen = 0; + for (int i = 0; i < 24; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + /** Constructor with generated Token Manager. */ + public FtScript(FtScriptTokenManager tm) { + token_source = tm; + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 24; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + /** Reinitialise. */ + public void ReInit(FtScriptTokenManager tm) { + token_source = tm; + token = new Token(); + jj_ntk = -1; + jjtree.reset(); + jj_gen = 0; + for (int i = 0; i < 24; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + private Token jj_consume_token(int kind) throws ParseException { + Token oldToken; + if ((oldToken = token).next != null) token = token.next; + else token = token.next = token_source.getNextToken(); + jj_ntk = -1; + if (token.kind == kind) { + jj_gen++; + if (++jj_gc > 100) { + jj_gc = 0; + for (int i = 0; i < jj_2_rtns.length; i++) { + JJCalls c = jj_2_rtns[i]; + while (c != null) { + if (c.gen < jj_gen) c.first = null; + c = c.next; + } + } + } + return token; + } + token = oldToken; + jj_kind = kind; + throw generateParseException(); + } + + static private final class LookaheadSuccess extends java.lang.Error { } + final private LookaheadSuccess jj_ls = new LookaheadSuccess(); + private boolean jj_scan_token(int kind) { + if (jj_scanpos == jj_lastpos) { + jj_la--; + if (jj_scanpos.next == null) { + jj_lastpos = jj_scanpos = jj_scanpos.next = token_source.getNextToken(); + } else { + jj_lastpos = jj_scanpos = jj_scanpos.next; + } + } else { + jj_scanpos = jj_scanpos.next; + } + if (jj_rescan) { + int i = 0; Token tok = token; + while (tok != null && tok != jj_scanpos) { i++; tok = tok.next; } + if (tok != null) jj_add_error_token(kind, i); + } + if (jj_scanpos.kind != kind) return true; + if (jj_la == 0 && jj_scanpos == jj_lastpos) throw jj_ls; + return false; + } + + +/** Get the next Token. */ + final public Token getNextToken() { + if (token.next != null) token = token.next; + else token = token.next = token_source.getNextToken(); + jj_ntk = -1; + jj_gen++; + return token; + } + +/** Get the specific Token. */ + final public Token getToken(int index) { + Token t = token; + for (int i = 0; i < index; i++) { + if (t.next != null) t = t.next; + else t = t.next = token_source.getNextToken(); + } + return t; + } + + private int jj_ntk() { + if ((jj_nt=token.next) == null) + return (jj_ntk = (token.next=token_source.getNextToken()).kind); + else + return (jj_ntk = jj_nt.kind); + } + + private java.util.List jj_expentries = new java.util.ArrayList(); + private int[] jj_expentry; + private int jj_kind = -1; + private int[] jj_lasttokens = new int[100]; + private int jj_endpos; + + private void jj_add_error_token(int kind, int pos) { + if (pos >= 100) return; + if (pos == jj_endpos + 1) { + jj_lasttokens[jj_endpos++] = kind; + } else if (jj_endpos != 0) { + jj_expentry = new int[jj_endpos]; + for (int i = 0; i < jj_endpos; i++) { + jj_expentry[i] = jj_lasttokens[i]; + } + jj_entries_loop: for (java.util.Iterator it = jj_expentries.iterator(); it.hasNext();) { + int[] oldentry = (int[])(it.next()); + if (oldentry.length == jj_expentry.length) { + for (int i = 0; i < jj_expentry.length; i++) { + if (oldentry[i] != jj_expentry[i]) { + continue jj_entries_loop; + } + } + jj_expentries.add(jj_expentry); + break jj_entries_loop; + } + } + if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind; + } + } + + /** Generate ParseException. */ + public ParseException generateParseException() { + jj_expentries.clear(); + boolean[] la1tokens = new boolean[70]; + if (jj_kind >= 0) { + la1tokens[jj_kind] = true; + jj_kind = -1; + } + for (int i = 0; i < 24; i++) { + if (jj_la1[i] == jj_gen) { + for (int j = 0; j < 32; j++) { + if ((jj_la1_0[i] & (1< jj_gen) { + jj_la = p.arg; jj_lastpos = jj_scanpos = p.first; + switch (i) { + case 0: jj_3_1(); break; + } + } + p = p.next; + } while (p != null); + } catch(LookaheadSuccess ls) { } + } + jj_rescan = false; + } + + private void jj_save(int index, int xla) { + JJCalls p = jj_2_rtns[index]; + while (p.gen > jj_gen) { + if (p.next == null) { p = p.next = new JJCalls(); break; } + p = p.next; + } + p.gen = jj_gen + xla - jj_la; p.first = token; p.arg = xla; + } + + static final class JJCalls { + int gen; + Token first; + int arg; + JJCalls next; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptConstants.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptConstants.java new file mode 100644 index 0000000..e6a7dbb --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptConstants.java @@ -0,0 +1,217 @@ +/* Generated By:JJTree&JavaCC: Do not edit this line. FtScriptConstants.java */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + + +/** + * Token literal values and constants. + * Generated by org.javacc.parser.OtherFilesGen#start() + */ +public interface FtScriptConstants { + + /** End of File. */ + int EOF = 0; + /** RegularExpression Id. */ + int SINGLE_LINE_COMMENT = 8; + /** RegularExpression Id. */ + int IMPORT = 9; + /** RegularExpression Id. */ + int TRANS = 10; + /** RegularExpression Id. */ + int IF = 11; + /** RegularExpression Id. */ + int ELSE = 12; + /** RegularExpression Id. */ + int END = 13; + /** RegularExpression Id. */ + int EXTRACT = 14; + /** RegularExpression Id. */ + int IDENTIFIEREXTRACT = 15; + /** RegularExpression Id. */ + int CONVERT = 16; + /** RegularExpression Id. */ + int REGEXPR = 17; + /** RegularExpression Id. */ + int GETVALUE = 18; + /** RegularExpression Id. */ + int COPY = 19; + /** RegularExpression Id. */ + int DECLARE_NAMESPACE = 20; + /** RegularExpression Id. */ + int DECLARE_SCRIPT = 21; + /** RegularExpression Id. */ + int DECLARE_SUBSCRIPT = 22; + /** RegularExpression Id. */ + int PREPROCESS = 23; + /** RegularExpression Id. */ + int SET = 24; + /** RegularExpression Id. */ + int SKIPRECORD = 25; + /** RegularExpression Id. */ + int SPLIT = 26; + /** RegularExpression Id. */ + int STATIC = 27; + /** RegularExpression Id. */ + int XPATH_SCHEME = 28; + /** RegularExpression Id. */ + int APPLY = 29; + /** RegularExpression Id. */ + int EMPTY = 30; + /** RegularExpression Id. */ + int DBLOOKUP = 31; + /** RegularExpression Id. */ + int BLACKLIST = 32; + /** RegularExpression Id. */ + int LOOKUP = 33; + /** RegularExpression Id. */ + int CONCAT = 34; + /** RegularExpression Id. */ + int SEMICOLON = 35; + /** RegularExpression Id. */ + int RBRACKET = 36; + /** RegularExpression Id. */ + int LBRACE = 37; + /** RegularExpression Id. */ + int RBRACE = 38; + /** RegularExpression Id. */ + int COMMA = 39; + /** RegularExpression Id. */ + int RPAREN = 40; + /** RegularExpression Id. */ + int QUOTE = 41; + /** RegularExpression Id. */ + int EQUAL = 42; + /** RegularExpression Id. */ + int NOTEQUAL = 43; + /** RegularExpression Id. */ + int GT = 44; + /** RegularExpression Id. */ + int LT = 45; + /** RegularExpression Id. */ + int GTE = 46; + /** RegularExpression Id. */ + int LTE = 47; + /** RegularExpression Id. */ + int PLUS = 48; + /** RegularExpression Id. */ + int VBAR = 49; + /** RegularExpression Id. */ + int SLASH = 50; + /** RegularExpression Id. */ + int DOT_DOT = 51; + /** RegularExpression Id. */ + int AT = 52; + /** RegularExpression Id. */ + int LPAREN = 53; + /** RegularExpression Id. */ + int LBRACKET = 54; + /** RegularExpression Id. */ + int DOLLAR_QNAME = 55; + /** RegularExpression Id. */ + int PERCENT_QNAME = 56; + /** RegularExpression Id. */ + int XPATH = 57; + /** RegularExpression Id. */ + int JOBCONST = 58; + /** RegularExpression Id. */ + int JOBCONST_PREFIX = 59; + /** RegularExpression Id. */ + int QUOTED_STRING = 60; + /** RegularExpression Id. */ + int SINGLE_QUOTE = 61; + /** RegularExpression Id. */ + int CHARACTER_LITERAL = 62; + /** RegularExpression Id. */ + int STRING_LITERAL = 63; + /** RegularExpression Id. */ + int URI = 64; + /** RegularExpression Id. */ + int IDENTIFIER = 65; + /** RegularExpression Id. */ + int LETTER_OR_DIGIT = 66; + /** RegularExpression Id. */ + int MINUS = 67; + /** RegularExpression Id. */ + int DOT = 68; + /** RegularExpression Id. */ + int COLON = 69; + + /** Lexical state. */ + int DEFAULT = 0; + /** Lexical state. */ + int IN_SINGLE_LINE_COMMENT = 1; + + /** Literal token values. */ + String[] tokenImage = { + "", + "\" \"", + "\"\\t\"", + "\"\\n\"", + "\"\\r\"", + "", + "", + "\"//\"", + "", + "\"import\"", + "\"trans\"", + "\"if\"", + "\"else\"", + "\"end\"", + "\"Extract\"", + "\"identifierExtract\"", + "\"Convert\"", + "\"RegExpr\"", + "\"getValue\"", + "\"copy\"", + "\"declare_ns\"", + "\"declare_script\"", + "\"declare_subscript\"", + "\"preprocess\"", + "\"set\"", + "\"skipRecord\"", + "\"split\"", + "\"static\"", + "\"xpath:\"", + "\"apply\"", + "\"empty\"", + "\"dblookup\"", + "\"blacklist\"", + "\"lookup\"", + "\"concat\"", + "\";\"", + "\"]\"", + "\"{\"", + "\"}\"", + "\",\"", + "\")\"", + "\"\\\'\"", + "\"=\"", + "\"!=\"", + "", + "", + "", + "", + "\"+\"", + "\"|\"", + "\"/\"", + "\"..\"", + "\"@\"", + "\"(\"", + "\"[\"", + "", + "", + "", + "", + "\"$job.\"", + "", + "", + "", + "", + "", + "", + "", + "\"-\"", + "\".\"", + "\":\"", + }; + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptTokenManager.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptTokenManager.java new file mode 100644 index 0000000..cca943e --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptTokenManager.java @@ -0,0 +1,1992 @@ +/* Generated By:JJTree&JavaCC: Do not edit this line. FtScriptTokenManager.java */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; +import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; + +/** Token Manager. */ +public class FtScriptTokenManager implements FtScriptConstants +{ + + /** Debug output. */ + public java.io.PrintStream debugStream = System.out; + /** Set debug output. */ + public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; } +private final int jjStopStringLiteralDfa_0(int pos, long active0) +{ + switch (pos) + { + case 0: + if ((active0 & 0x7effffe00L) != 0L) + { + jjmatchedKind = 65; + return 95; + } + if ((active0 & 0x20000000000L) != 0L) + return 76; + if ((active0 & 0x10000000L) != 0L) + { + jjmatchedKind = 65; + return 19; + } + if ((active0 & 0x2L) != 0L) + return 44; + if ((active0 & 0x10000000000000L) != 0L) + return 95; + if ((active0 & 0x4000000000080L) != 0L) + return 28; + return -1; + case 1: + if ((active0 & 0x7effff600L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 1; + return 95; + } + if ((active0 & 0x10000000L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 1; + return 18; + } + if ((active0 & 0x80L) != 0L) + return 96; + if ((active0 & 0x800L) != 0L) + return 95; + return -1; + case 2: + if ((active0 & 0x7eeffd600L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 2; + return 95; + } + if ((active0 & 0x10000000L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 2; + return 17; + } + if ((active0 & 0x1002000L) != 0L) + return 95; + return -1; + case 3: + if ((active0 & 0x10000000L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 3; + return 16; + } + if ((active0 & 0x7eef7c600L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 3; + return 95; + } + if ((active0 & 0x81000L) != 0L) + return 95; + return -1; + case 4: + if ((active0 & 0x78af7c200L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 4; + return 95; + } + if ((active0 & 0x10000000L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 4; + return 12; + } + if ((active0 & 0x64000400L) != 0L) + return 95; + return -1; + case 5: + if ((active0 & 0x182f7c000L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 5; + return 95; + } + if ((active0 & 0x10000000L) != 0L) + return 13; + if ((active0 & 0x608000200L) != 0L) + return 95; + return -1; + case 6: + if ((active0 & 0x182f48000L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 6; + return 95; + } + if ((active0 & 0x34000L) != 0L) + return 95; + return -1; + case 7: + if ((active0 & 0x102f08000L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 7; + return 95; + } + if ((active0 & 0x80040000L) != 0L) + return 95; + return -1; + case 8: + if ((active0 & 0x2f08000L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 8; + return 95; + } + if ((active0 & 0x100000000L) != 0L) + return 95; + return -1; + case 9: + if ((active0 & 0x608000L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 9; + return 95; + } + if ((active0 & 0x2900000L) != 0L) + return 95; + return -1; + case 10: + if ((active0 & 0x608000L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 10; + return 95; + } + return -1; + case 11: + if ((active0 & 0x608000L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 11; + return 95; + } + return -1; + case 12: + if ((active0 & 0x608000L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 12; + return 95; + } + return -1; + case 13: + if ((active0 & 0x408000L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 13; + return 95; + } + if ((active0 & 0x200000L) != 0L) + return 95; + return -1; + case 14: + if ((active0 & 0x408000L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 14; + return 95; + } + return -1; + case 15: + if ((active0 & 0x408000L) != 0L) + { + jjmatchedKind = 65; + jjmatchedPos = 15; + return 95; + } + return -1; + default : + return -1; + } +} +private final int jjStartNfa_0(int pos, long active0) +{ + return jjMoveNfa_0(jjStopStringLiteralDfa_0(pos, active0), pos + 1); +} +private int jjStopAtPos(int pos, int kind) +{ + jjmatchedKind = kind; + jjmatchedPos = pos; + return pos + 1; +} +private int jjMoveStringLiteralDfa0_0() +{ + switch(curChar) + { + case 32: + return jjStartNfaWithStates_0(0, 1, 44); + case 33: + return jjMoveStringLiteralDfa1_0(0x80000000000L); + case 39: + return jjStartNfaWithStates_0(0, 41, 76); + case 40: + return jjStopAtPos(0, 53); + case 41: + return jjStopAtPos(0, 40); + case 43: + return jjStopAtPos(0, 48); + case 44: + return jjStopAtPos(0, 39); + case 46: + return jjMoveStringLiteralDfa1_0(0x8000000000000L); + case 47: + jjmatchedKind = 50; + return jjMoveStringLiteralDfa1_0(0x80L); + case 59: + return jjStopAtPos(0, 35); + case 61: + return jjStopAtPos(0, 42); + case 64: + return jjStartNfaWithStates_0(0, 52, 95); + case 67: + return jjMoveStringLiteralDfa1_0(0x10000L); + case 69: + return jjMoveStringLiteralDfa1_0(0x4000L); + case 82: + return jjMoveStringLiteralDfa1_0(0x20000L); + case 91: + return jjStopAtPos(0, 54); + case 93: + return jjStopAtPos(0, 36); + case 97: + return jjMoveStringLiteralDfa1_0(0x20000000L); + case 98: + return jjMoveStringLiteralDfa1_0(0x100000000L); + case 99: + return jjMoveStringLiteralDfa1_0(0x400080000L); + case 100: + return jjMoveStringLiteralDfa1_0(0x80700000L); + case 101: + return jjMoveStringLiteralDfa1_0(0x40003000L); + case 103: + return jjMoveStringLiteralDfa1_0(0x40000L); + case 105: + return jjMoveStringLiteralDfa1_0(0x8a00L); + case 108: + return jjMoveStringLiteralDfa1_0(0x200000000L); + case 112: + return jjMoveStringLiteralDfa1_0(0x800000L); + case 115: + return jjMoveStringLiteralDfa1_0(0xf000000L); + case 116: + return jjMoveStringLiteralDfa1_0(0x400L); + case 120: + return jjMoveStringLiteralDfa1_0(0x10000000L); + case 123: + return jjStopAtPos(0, 37); + case 124: + return jjStopAtPos(0, 49); + case 125: + return jjStopAtPos(0, 38); + default : + return jjMoveNfa_0(0, 0); + } +} +private int jjMoveStringLiteralDfa1_0(long active0) +{ + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(0, active0); + return 1; + } + switch(curChar) + { + case 46: + if ((active0 & 0x8000000000000L) != 0L) + return jjStopAtPos(1, 51); + break; + case 47: + if ((active0 & 0x80L) != 0L) + return jjStartNfaWithStates_0(1, 7, 96); + break; + case 61: + if ((active0 & 0x80000000000L) != 0L) + return jjStopAtPos(1, 43); + break; + case 98: + return jjMoveStringLiteralDfa2_0(active0, 0x80000000L); + case 100: + return jjMoveStringLiteralDfa2_0(active0, 0x8000L); + case 101: + return jjMoveStringLiteralDfa2_0(active0, 0x1760000L); + case 102: + if ((active0 & 0x800L) != 0L) + return jjStartNfaWithStates_0(1, 11, 95); + break; + case 107: + return jjMoveStringLiteralDfa2_0(active0, 0x2000000L); + case 108: + return jjMoveStringLiteralDfa2_0(active0, 0x100001000L); + case 109: + return jjMoveStringLiteralDfa2_0(active0, 0x40000200L); + case 110: + return jjMoveStringLiteralDfa2_0(active0, 0x2000L); + case 111: + return jjMoveStringLiteralDfa2_0(active0, 0x600090000L); + case 112: + return jjMoveStringLiteralDfa2_0(active0, 0x34000000L); + case 114: + return jjMoveStringLiteralDfa2_0(active0, 0x800400L); + case 116: + return jjMoveStringLiteralDfa2_0(active0, 0x8000000L); + case 120: + return jjMoveStringLiteralDfa2_0(active0, 0x4000L); + default : + break; + } + return jjStartNfa_0(0, active0); +} +private int jjMoveStringLiteralDfa2_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(0, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(1, active0); + return 2; + } + switch(curChar) + { + case 97: + return jjMoveStringLiteralDfa3_0(active0, 0x118000400L); + case 99: + return jjMoveStringLiteralDfa3_0(active0, 0x700000L); + case 100: + if ((active0 & 0x2000L) != 0L) + return jjStartNfaWithStates_0(2, 13, 95); + break; + case 101: + return jjMoveStringLiteralDfa3_0(active0, 0x808000L); + case 103: + return jjMoveStringLiteralDfa3_0(active0, 0x20000L); + case 105: + return jjMoveStringLiteralDfa3_0(active0, 0x2000000L); + case 108: + return jjMoveStringLiteralDfa3_0(active0, 0x84000000L); + case 110: + return jjMoveStringLiteralDfa3_0(active0, 0x400010000L); + case 111: + return jjMoveStringLiteralDfa3_0(active0, 0x200000000L); + case 112: + return jjMoveStringLiteralDfa3_0(active0, 0x60080200L); + case 115: + return jjMoveStringLiteralDfa3_0(active0, 0x1000L); + case 116: + if ((active0 & 0x1000000L) != 0L) + return jjStartNfaWithStates_0(2, 24, 95); + return jjMoveStringLiteralDfa3_0(active0, 0x44000L); + default : + break; + } + return jjStartNfa_0(1, active0); +} +private int jjMoveStringLiteralDfa3_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(1, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(2, active0); + return 3; + } + switch(curChar) + { + case 69: + return jjMoveStringLiteralDfa4_0(active0, 0x20000L); + case 86: + return jjMoveStringLiteralDfa4_0(active0, 0x40000L); + case 99: + return jjMoveStringLiteralDfa4_0(active0, 0x500000000L); + case 101: + if ((active0 & 0x1000L) != 0L) + return jjStartNfaWithStates_0(3, 12, 95); + break; + case 105: + return jjMoveStringLiteralDfa4_0(active0, 0x4000000L); + case 107: + return jjMoveStringLiteralDfa4_0(active0, 0x200000000L); + case 108: + return jjMoveStringLiteralDfa4_0(active0, 0x20700000L); + case 110: + return jjMoveStringLiteralDfa4_0(active0, 0x8400L); + case 111: + return jjMoveStringLiteralDfa4_0(active0, 0x80000200L); + case 112: + return jjMoveStringLiteralDfa4_0(active0, 0x2800000L); + case 114: + return jjMoveStringLiteralDfa4_0(active0, 0x4000L); + case 116: + return jjMoveStringLiteralDfa4_0(active0, 0x58000000L); + case 118: + return jjMoveStringLiteralDfa4_0(active0, 0x10000L); + case 121: + if ((active0 & 0x80000L) != 0L) + return jjStartNfaWithStates_0(3, 19, 95); + break; + default : + break; + } + return jjStartNfa_0(2, active0); +} +private int jjMoveStringLiteralDfa4_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(2, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(3, active0); + return 4; + } + switch(curChar) + { + case 82: + return jjMoveStringLiteralDfa5_0(active0, 0x2000000L); + case 97: + return jjMoveStringLiteralDfa5_0(active0, 0x400744000L); + case 101: + return jjMoveStringLiteralDfa5_0(active0, 0x10000L); + case 104: + return jjMoveStringLiteralDfa5_0(active0, 0x10000000L); + case 105: + return jjMoveStringLiteralDfa5_0(active0, 0x8000000L); + case 107: + return jjMoveStringLiteralDfa5_0(active0, 0x100000000L); + case 111: + return jjMoveStringLiteralDfa5_0(active0, 0x80000000L); + case 114: + return jjMoveStringLiteralDfa5_0(active0, 0x800200L); + case 115: + if ((active0 & 0x400L) != 0L) + return jjStartNfaWithStates_0(4, 10, 95); + break; + case 116: + if ((active0 & 0x4000000L) != 0L) + return jjStartNfaWithStates_0(4, 26, 95); + return jjMoveStringLiteralDfa5_0(active0, 0x8000L); + case 117: + return jjMoveStringLiteralDfa5_0(active0, 0x200000000L); + case 120: + return jjMoveStringLiteralDfa5_0(active0, 0x20000L); + case 121: + if ((active0 & 0x20000000L) != 0L) + return jjStartNfaWithStates_0(4, 29, 95); + else if ((active0 & 0x40000000L) != 0L) + return jjStartNfaWithStates_0(4, 30, 95); + break; + default : + break; + } + return jjStartNfa_0(3, active0); +} +private int jjMoveStringLiteralDfa5_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(3, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(4, active0); + return 5; + } + switch(curChar) + { + case 58: + if ((active0 & 0x10000000L) != 0L) + return jjStartNfaWithStates_0(5, 28, 13); + break; + case 99: + if ((active0 & 0x8000000L) != 0L) + return jjStartNfaWithStates_0(5, 27, 95); + return jjMoveStringLiteralDfa6_0(active0, 0x4000L); + case 101: + return jjMoveStringLiteralDfa6_0(active0, 0x2000000L); + case 105: + return jjMoveStringLiteralDfa6_0(active0, 0x8000L); + case 107: + return jjMoveStringLiteralDfa6_0(active0, 0x80000000L); + case 108: + return jjMoveStringLiteralDfa6_0(active0, 0x100040000L); + case 111: + return jjMoveStringLiteralDfa6_0(active0, 0x800000L); + case 112: + if ((active0 & 0x200000000L) != 0L) + return jjStartNfaWithStates_0(5, 33, 95); + return jjMoveStringLiteralDfa6_0(active0, 0x20000L); + case 114: + return jjMoveStringLiteralDfa6_0(active0, 0x710000L); + case 116: + if ((active0 & 0x200L) != 0L) + return jjStartNfaWithStates_0(5, 9, 95); + else if ((active0 & 0x400000000L) != 0L) + return jjStartNfaWithStates_0(5, 34, 95); + break; + default : + break; + } + return jjStartNfa_0(4, active0); +} +private int jjMoveStringLiteralDfa6_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(4, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(5, active0); + return 6; + } + switch(curChar) + { + case 99: + return jjMoveStringLiteralDfa7_0(active0, 0x2800000L); + case 101: + return jjMoveStringLiteralDfa7_0(active0, 0x700000L); + case 102: + return jjMoveStringLiteralDfa7_0(active0, 0x8000L); + case 105: + return jjMoveStringLiteralDfa7_0(active0, 0x100000000L); + case 114: + if ((active0 & 0x20000L) != 0L) + return jjStartNfaWithStates_0(6, 17, 95); + break; + case 116: + if ((active0 & 0x4000L) != 0L) + return jjStartNfaWithStates_0(6, 14, 95); + else if ((active0 & 0x10000L) != 0L) + return jjStartNfaWithStates_0(6, 16, 95); + break; + case 117: + return jjMoveStringLiteralDfa7_0(active0, 0x80040000L); + default : + break; + } + return jjStartNfa_0(5, active0); +} +private int jjMoveStringLiteralDfa7_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(5, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(6, active0); + return 7; + } + switch(curChar) + { + case 95: + return jjMoveStringLiteralDfa8_0(active0, 0x700000L); + case 101: + if ((active0 & 0x40000L) != 0L) + return jjStartNfaWithStates_0(7, 18, 95); + return jjMoveStringLiteralDfa8_0(active0, 0x800000L); + case 105: + return jjMoveStringLiteralDfa8_0(active0, 0x8000L); + case 111: + return jjMoveStringLiteralDfa8_0(active0, 0x2000000L); + case 112: + if ((active0 & 0x80000000L) != 0L) + return jjStartNfaWithStates_0(7, 31, 95); + break; + case 115: + return jjMoveStringLiteralDfa8_0(active0, 0x100000000L); + default : + break; + } + return jjStartNfa_0(6, active0); +} +private int jjMoveStringLiteralDfa8_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(6, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(7, active0); + return 8; + } + switch(curChar) + { + case 101: + return jjMoveStringLiteralDfa9_0(active0, 0x8000L); + case 110: + return jjMoveStringLiteralDfa9_0(active0, 0x100000L); + case 114: + return jjMoveStringLiteralDfa9_0(active0, 0x2000000L); + case 115: + return jjMoveStringLiteralDfa9_0(active0, 0xe00000L); + case 116: + if ((active0 & 0x100000000L) != 0L) + return jjStartNfaWithStates_0(8, 32, 95); + break; + default : + break; + } + return jjStartNfa_0(7, active0); +} +private int jjMoveStringLiteralDfa9_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(7, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(8, active0); + return 9; + } + switch(curChar) + { + case 99: + return jjMoveStringLiteralDfa10_0(active0, 0x200000L); + case 100: + if ((active0 & 0x2000000L) != 0L) + return jjStartNfaWithStates_0(9, 25, 95); + break; + case 114: + return jjMoveStringLiteralDfa10_0(active0, 0x8000L); + case 115: + if ((active0 & 0x100000L) != 0L) + return jjStartNfaWithStates_0(9, 20, 95); + else if ((active0 & 0x800000L) != 0L) + return jjStartNfaWithStates_0(9, 23, 95); + break; + case 117: + return jjMoveStringLiteralDfa10_0(active0, 0x400000L); + default : + break; + } + return jjStartNfa_0(8, active0); +} +private int jjMoveStringLiteralDfa10_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(8, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(9, active0); + return 10; + } + switch(curChar) + { + case 69: + return jjMoveStringLiteralDfa11_0(active0, 0x8000L); + case 98: + return jjMoveStringLiteralDfa11_0(active0, 0x400000L); + case 114: + return jjMoveStringLiteralDfa11_0(active0, 0x200000L); + default : + break; + } + return jjStartNfa_0(9, active0); +} +private int jjMoveStringLiteralDfa11_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(9, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(10, active0); + return 11; + } + switch(curChar) + { + case 105: + return jjMoveStringLiteralDfa12_0(active0, 0x200000L); + case 115: + return jjMoveStringLiteralDfa12_0(active0, 0x400000L); + case 120: + return jjMoveStringLiteralDfa12_0(active0, 0x8000L); + default : + break; + } + return jjStartNfa_0(10, active0); +} +private int jjMoveStringLiteralDfa12_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(10, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(11, active0); + return 12; + } + switch(curChar) + { + case 99: + return jjMoveStringLiteralDfa13_0(active0, 0x400000L); + case 112: + return jjMoveStringLiteralDfa13_0(active0, 0x200000L); + case 116: + return jjMoveStringLiteralDfa13_0(active0, 0x8000L); + default : + break; + } + return jjStartNfa_0(11, active0); +} +private int jjMoveStringLiteralDfa13_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(11, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(12, active0); + return 13; + } + switch(curChar) + { + case 114: + return jjMoveStringLiteralDfa14_0(active0, 0x408000L); + case 116: + if ((active0 & 0x200000L) != 0L) + return jjStartNfaWithStates_0(13, 21, 95); + break; + default : + break; + } + return jjStartNfa_0(12, active0); +} +private int jjMoveStringLiteralDfa14_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(12, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(13, active0); + return 14; + } + switch(curChar) + { + case 97: + return jjMoveStringLiteralDfa15_0(active0, 0x8000L); + case 105: + return jjMoveStringLiteralDfa15_0(active0, 0x400000L); + default : + break; + } + return jjStartNfa_0(13, active0); +} +private int jjMoveStringLiteralDfa15_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(13, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(14, active0); + return 15; + } + switch(curChar) + { + case 99: + return jjMoveStringLiteralDfa16_0(active0, 0x8000L); + case 112: + return jjMoveStringLiteralDfa16_0(active0, 0x400000L); + default : + break; + } + return jjStartNfa_0(14, active0); +} +private int jjMoveStringLiteralDfa16_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(14, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(15, active0); + return 16; + } + switch(curChar) + { + case 116: + if ((active0 & 0x8000L) != 0L) + return jjStartNfaWithStates_0(16, 15, 95); + else if ((active0 & 0x400000L) != 0L) + return jjStartNfaWithStates_0(16, 22, 95); + break; + default : + break; + } + return jjStartNfa_0(15, active0); +} +private int jjStartNfaWithStates_0(int pos, int kind, int state) +{ + jjmatchedKind = kind; + jjmatchedPos = pos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return pos + 1; } + return jjMoveNfa_0(state, pos + 1); +} +static final long[] jjbitVec0 = { + 0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL +}; +private int jjMoveNfa_0(int startState, int curPos) +{ + int startsAt = 0; + jjnewStateCnt = 95; + int i = 1; + jjstateSet[0] = startState; + int kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + do + { + switch(jjstateSet[--i]) + { + case 13: + if ((0x7ff600000000000L & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + } + else if (curChar == 47) + jjstateSet[jjnewStateCnt++] = 86; + else if (curChar == 34) + jjCheckNAdd(14); + if ((0x7ff600000000000L & l) != 0L) + jjCheckNAddTwoStates(85, 93); + if (curChar == 58) + jjstateSet[jjnewStateCnt++] = 92; + break; + case 17: + if ((0x7ff600000000000L & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + } + if ((0x7ff600000000000L & l) != 0L) + jjCheckNAddTwoStates(85, 93); + if (curChar == 58) + jjstateSet[jjnewStateCnt++] = 92; + break; + case 16: + if ((0x7ff600000000000L & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + } + if ((0x7ff600000000000L & l) != 0L) + jjCheckNAddTwoStates(85, 93); + if (curChar == 58) + jjstateSet[jjnewStateCnt++] = 92; + break; + case 28: + if (curChar == 42) + jjCheckNAddTwoStates(34, 35); + else if (curChar == 47) + jjCheckNAddStates(0, 2); + break; + case 18: + if ((0x7ff600000000000L & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + } + if ((0x7ff600000000000L & l) != 0L) + jjCheckNAddTwoStates(85, 93); + if (curChar == 58) + jjstateSet[jjnewStateCnt++] = 92; + break; + case 44: + if (curChar == 38) + jjstateSet[jjnewStateCnt++] = 60; + if (curChar == 38) + jjstateSet[jjnewStateCnt++] = 54; + if (curChar == 38) + jjstateSet[jjnewStateCnt++] = 48; + if (curChar == 38) + jjstateSet[jjnewStateCnt++] = 43; + break; + case 12: + if ((0x7ff600000000000L & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + } + if ((0x7ff600000000000L & l) != 0L) + jjCheckNAddTwoStates(85, 93); + if (curChar == 58) + jjstateSet[jjnewStateCnt++] = 92; + if (curChar == 58) + jjstateSet[jjnewStateCnt++] = 13; + break; + case 19: + if ((0x7ff600000000000L & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + } + if ((0x7ff600000000000L & l) != 0L) + jjCheckNAddTwoStates(85, 93); + if (curChar == 58) + jjstateSet[jjnewStateCnt++] = 92; + break; + case 95: + if ((0x7ff600000000000L & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + } + if ((0x7ff600000000000L & l) != 0L) + jjCheckNAddTwoStates(85, 93); + if (curChar == 58) + jjstateSet[jjnewStateCnt++] = 92; + break; + case 96: + if ((0xffffffffffffdbffL & l) != 0L) + jjCheckNAddStates(0, 2); + else if ((0x2400L & l) != 0L) + { + if (kind > 5) + kind = 5; + } + if (curChar == 13) + jjstateSet[jjnewStateCnt++] = 31; + break; + case 0: + if ((0x3ff000000000000L & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAddStates(3, 5); + } + else if (curChar == 39) + jjCheckNAddStates(6, 8); + else if (curChar == 34) + jjCheckNAddStates(9, 12); + else if (curChar == 32) + jjAddStates(13, 16); + else if (curChar == 47) + jjAddStates(17, 18); + else if (curChar == 36) + jjstateSet[jjnewStateCnt++] = 25; + else if (curChar == 37) + jjstateSet[jjnewStateCnt++] = 10; + else if (curChar == 60) + jjstateSet[jjnewStateCnt++] = 4; + else if (curChar == 62) + jjstateSet[jjnewStateCnt++] = 2; + if (curChar == 36) + jjstateSet[jjnewStateCnt++] = 7; + else if (curChar == 60) + { + if (kind > 45) + kind = 45; + } + else if (curChar == 62) + { + if (kind > 44) + kind = 44; + } + break; + case 76: + if ((0xffffff7fffffffffL & l) != 0L) + jjCheckNAddTwoStates(74, 75); + if ((0xffffff7fffffdbffL & l) != 0L) + jjCheckNAdd(77); + break; + case 1: + if (curChar == 60 && kind > 45) + kind = 45; + break; + case 2: + if (curChar == 61 && kind > 46) + kind = 46; + break; + case 3: + if (curChar == 62) + jjstateSet[jjnewStateCnt++] = 2; + break; + case 4: + if (curChar == 61 && kind > 47) + kind = 47; + break; + case 5: + if (curChar == 60) + jjstateSet[jjnewStateCnt++] = 4; + break; + case 6: + if (curChar == 36) + jjstateSet[jjnewStateCnt++] = 7; + break; + case 7: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 55) + kind = 55; + jjCheckNAdd(8); + break; + case 8: + if ((0x7ff600000000000L & l) == 0L) + break; + if (kind > 55) + kind = 55; + jjCheckNAdd(8); + break; + case 9: + if (curChar == 37) + jjstateSet[jjnewStateCnt++] = 10; + break; + case 10: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 56) + kind = 56; + jjCheckNAdd(11); + break; + case 11: + if ((0x7ff600000000000L & l) == 0L) + break; + if (kind > 56) + kind = 56; + jjCheckNAdd(11); + break; + case 14: + if ((0xfffffffbffffffffL & l) != 0L) + jjCheckNAddTwoStates(14, 15); + break; + case 15: + if (curChar == 34 && kind > 57) + kind = 57; + break; + case 21: + if (curChar == 46) + jjCheckNAdd(22); + break; + case 22: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 58) + kind = 58; + jjCheckNAdd(22); + break; + case 26: + if (curChar == 36) + jjstateSet[jjnewStateCnt++] = 25; + break; + case 27: + if (curChar == 47) + jjAddStates(17, 18); + break; + case 29: + if ((0xffffffffffffdbffL & l) != 0L) + jjCheckNAddStates(0, 2); + break; + case 30: + if ((0x2400L & l) != 0L && kind > 5) + kind = 5; + break; + case 31: + if (curChar == 10 && kind > 5) + kind = 5; + break; + case 32: + if (curChar == 13) + jjstateSet[jjnewStateCnt++] = 31; + break; + case 33: + if (curChar == 42) + jjCheckNAddTwoStates(34, 35); + break; + case 34: + if ((0xfffffbffffffffffL & l) != 0L) + jjCheckNAddTwoStates(34, 35); + break; + case 35: + if (curChar == 42) + jjAddStates(19, 20); + break; + case 36: + if ((0xffff7fffffffffffL & l) != 0L) + jjCheckNAddTwoStates(37, 35); + break; + case 37: + if ((0xfffffbffffffffffL & l) != 0L) + jjCheckNAddTwoStates(37, 35); + break; + case 38: + if (curChar == 47 && kind > 6) + kind = 6; + break; + case 39: + if (curChar == 32) + jjAddStates(13, 16); + break; + case 40: + if (curChar == 32 && kind > 44) + kind = 44; + break; + case 41: + if (curChar == 59) + jjstateSet[jjnewStateCnt++] = 40; + break; + case 45: + if (curChar == 32 && kind > 45) + kind = 45; + break; + case 46: + if (curChar == 59) + jjstateSet[jjnewStateCnt++] = 45; + break; + case 49: + if (curChar == 38) + jjstateSet[jjnewStateCnt++] = 48; + break; + case 50: + if (curChar == 32 && kind > 46) + kind = 46; + break; + case 51: + if (curChar == 61) + jjstateSet[jjnewStateCnt++] = 50; + break; + case 52: + if (curChar == 59) + jjstateSet[jjnewStateCnt++] = 51; + break; + case 55: + if (curChar == 38) + jjstateSet[jjnewStateCnt++] = 54; + break; + case 56: + if (curChar == 32 && kind > 47) + kind = 47; + break; + case 57: + if (curChar == 61) + jjstateSet[jjnewStateCnt++] = 56; + break; + case 58: + if (curChar == 59) + jjstateSet[jjnewStateCnt++] = 57; + break; + case 61: + if (curChar == 38) + jjstateSet[jjnewStateCnt++] = 60; + break; + case 62: + if (curChar == 34) + jjCheckNAddStates(9, 12); + break; + case 63: + if ((0xfffffffbffffffffL & l) != 0L) + jjCheckNAddTwoStates(63, 64); + break; + case 64: + if (curChar == 34 && kind > 60) + kind = 60; + break; + case 65: + if ((0xfffffffbffffdbffL & l) != 0L) + jjCheckNAddStates(21, 23); + break; + case 67: + if ((0x8400000000L & l) != 0L) + jjCheckNAddStates(21, 23); + break; + case 68: + if (curChar == 34 && kind > 63) + kind = 63; + break; + case 69: + if ((0xff000000000000L & l) != 0L) + jjCheckNAddStates(24, 27); + break; + case 70: + if ((0xff000000000000L & l) != 0L) + jjCheckNAddStates(21, 23); + break; + case 71: + if ((0xf000000000000L & l) != 0L) + jjstateSet[jjnewStateCnt++] = 72; + break; + case 72: + if ((0xff000000000000L & l) != 0L) + jjCheckNAdd(70); + break; + case 73: + if (curChar == 39) + jjCheckNAddStates(6, 8); + break; + case 74: + if ((0xffffff7fffffffffL & l) != 0L) + jjCheckNAddTwoStates(74, 75); + break; + case 75: + if (curChar == 39 && kind > 61) + kind = 61; + break; + case 77: + if (curChar == 39 && kind > 62) + kind = 62; + break; + case 79: + if ((0x8400000000L & l) != 0L) + jjCheckNAdd(77); + break; + case 80: + if ((0xff000000000000L & l) != 0L) + jjCheckNAddTwoStates(81, 77); + break; + case 81: + if ((0xff000000000000L & l) != 0L) + jjCheckNAdd(77); + break; + case 82: + if ((0xf000000000000L & l) != 0L) + jjstateSet[jjnewStateCnt++] = 83; + break; + case 83: + if ((0xff000000000000L & l) != 0L) + jjCheckNAdd(81); + break; + case 84: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 65) + kind = 65; + jjCheckNAddStates(3, 5); + break; + case 85: + if ((0x7ff600000000000L & l) != 0L) + jjCheckNAddTwoStates(85, 93); + break; + case 86: + if (curChar == 47) + jjstateSet[jjnewStateCnt++] = 87; + break; + case 87: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 64) + kind = 64; + jjCheckNAddTwoStates(88, 89); + break; + case 88: + if ((0x7ff600000000000L & l) == 0L) + break; + if (kind > 64) + kind = 64; + jjCheckNAddTwoStates(88, 89); + break; + case 89: + if (curChar != 47) + break; + if (kind > 64) + kind = 64; + jjstateSet[jjnewStateCnt++] = 90; + break; + case 90: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 64) + kind = 64; + jjCheckNAddTwoStates(91, 89); + break; + case 91: + if ((0x7ff600000000000L & l) == 0L) + break; + if (kind > 64) + kind = 64; + jjCheckNAddTwoStates(91, 89); + break; + case 92: + if (curChar == 47) + jjstateSet[jjnewStateCnt++] = 86; + break; + case 93: + if (curChar == 58) + jjstateSet[jjnewStateCnt++] = 92; + break; + case 94: + if ((0x7ff600000000000L & l) == 0L) + break; + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + long l = 1L << (curChar & 077); + do + { + switch(jjstateSet[--i]) + { + case 13: + if ((0x7fffffe87ffffffL & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + } + if ((0x7fffffe87ffffffL & l) != 0L) + jjCheckNAddTwoStates(85, 93); + break; + case 17: + if ((0x7fffffe87ffffffL & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + } + if ((0x7fffffe87ffffffL & l) != 0L) + jjCheckNAddTwoStates(85, 93); + if (curChar == 116) + jjstateSet[jjnewStateCnt++] = 16; + break; + case 16: + if ((0x7fffffe87ffffffL & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + } + if ((0x7fffffe87ffffffL & l) != 0L) + jjCheckNAddTwoStates(85, 93); + if (curChar == 104) + jjstateSet[jjnewStateCnt++] = 12; + break; + case 18: + if ((0x7fffffe87ffffffL & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + } + if ((0x7fffffe87ffffffL & l) != 0L) + jjCheckNAddTwoStates(85, 93); + if (curChar == 97) + jjstateSet[jjnewStateCnt++] = 17; + break; + case 12: + if ((0x7fffffe87ffffffL & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + } + if ((0x7fffffe87ffffffL & l) != 0L) + jjCheckNAddTwoStates(85, 93); + break; + case 19: + if ((0x7fffffe87ffffffL & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + } + if ((0x7fffffe87ffffffL & l) != 0L) + jjCheckNAddTwoStates(85, 93); + if (curChar == 112) + jjstateSet[jjnewStateCnt++] = 18; + break; + case 95: + if ((0x7fffffe87ffffffL & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + } + if ((0x7fffffe87ffffffL & l) != 0L) + jjCheckNAddTwoStates(85, 93); + break; + case 96: + case 29: + jjCheckNAddStates(0, 2); + break; + case 0: + if ((0x7fffffe87ffffffL & l) != 0L) + { + if (kind > 65) + kind = 65; + jjCheckNAddStates(3, 5); + } + if (curChar == 120) + jjstateSet[jjnewStateCnt++] = 19; + break; + case 76: + jjCheckNAddTwoStates(74, 75); + if ((0xffffffffefffffffL & l) != 0L) + jjCheckNAdd(77); + else if (curChar == 92) + jjAddStates(28, 30); + break; + case 7: + case 8: + if ((0x7fffffe87ffffffL & l) == 0L) + break; + if (kind > 55) + kind = 55; + jjCheckNAdd(8); + break; + case 10: + case 11: + if ((0x7fffffe87ffffffL & l) == 0L) + break; + if (kind > 56) + kind = 56; + jjCheckNAdd(11); + break; + case 14: + jjAddStates(31, 32); + break; + case 20: + if (curChar == 120) + jjstateSet[jjnewStateCnt++] = 19; + break; + case 22: + if ((0x7fffffe87fffffeL & l) == 0L) + break; + if (kind > 58) + kind = 58; + jjstateSet[jjnewStateCnt++] = 22; + break; + case 23: + if (curChar == 98) + jjstateSet[jjnewStateCnt++] = 21; + break; + case 24: + if (curChar == 111) + jjstateSet[jjnewStateCnt++] = 23; + break; + case 25: + if (curChar == 106) + jjstateSet[jjnewStateCnt++] = 24; + break; + case 34: + jjCheckNAddTwoStates(34, 35); + break; + case 36: + case 37: + jjCheckNAddTwoStates(37, 35); + break; + case 42: + if (curChar == 116) + jjstateSet[jjnewStateCnt++] = 41; + break; + case 43: + if (curChar == 103) + jjstateSet[jjnewStateCnt++] = 42; + break; + case 47: + if (curChar == 116) + jjstateSet[jjnewStateCnt++] = 46; + break; + case 48: + if (curChar == 108) + jjstateSet[jjnewStateCnt++] = 47; + break; + case 53: + if (curChar == 116) + jjstateSet[jjnewStateCnt++] = 52; + break; + case 54: + if (curChar == 103) + jjstateSet[jjnewStateCnt++] = 53; + break; + case 59: + if (curChar == 116) + jjstateSet[jjnewStateCnt++] = 58; + break; + case 60: + if (curChar == 108) + jjstateSet[jjnewStateCnt++] = 59; + break; + case 63: + jjAddStates(33, 34); + break; + case 65: + if ((0xffffffffefffffffL & l) != 0L) + jjCheckNAddStates(21, 23); + break; + case 66: + if (curChar == 92) + jjAddStates(35, 37); + break; + case 67: + if ((0x14404410000000L & l) != 0L) + jjCheckNAddStates(21, 23); + break; + case 74: + jjCheckNAddTwoStates(74, 75); + break; + case 78: + if (curChar == 92) + jjAddStates(28, 30); + break; + case 79: + if ((0x14404410000000L & l) != 0L) + jjCheckNAdd(77); + break; + case 84: + if ((0x7fffffe87ffffffL & l) == 0L) + break; + if (kind > 65) + kind = 65; + jjCheckNAddStates(3, 5); + break; + case 85: + if ((0x7fffffe87ffffffL & l) != 0L) + jjCheckNAddTwoStates(85, 93); + break; + case 87: + case 88: + if ((0x7fffffe87ffffffL & l) == 0L) + break; + if (kind > 64) + kind = 64; + jjCheckNAddTwoStates(88, 89); + break; + case 90: + case 91: + if ((0x7fffffe87ffffffL & l) == 0L) + break; + if (kind > 64) + kind = 64; + jjCheckNAddTwoStates(91, 89); + break; + case 94: + if ((0x7fffffe87ffffffL & l) == 0L) + break; + if (kind > 65) + kind = 65; + jjCheckNAdd(94); + break; + default : break; + } + } while(i != startsAt); + } + else + { + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + do + { + switch(jjstateSet[--i]) + { + case 96: + case 29: + if ((jjbitVec0[i2] & l2) != 0L) + jjCheckNAddStates(0, 2); + break; + case 76: + if ((jjbitVec0[i2] & l2) != 0L) + jjCheckNAddTwoStates(74, 75); + if ((jjbitVec0[i2] & l2) != 0L) + jjstateSet[jjnewStateCnt++] = 77; + break; + case 14: + if ((jjbitVec0[i2] & l2) != 0L) + jjAddStates(31, 32); + break; + case 34: + if ((jjbitVec0[i2] & l2) != 0L) + jjCheckNAddTwoStates(34, 35); + break; + case 36: + case 37: + if ((jjbitVec0[i2] & l2) != 0L) + jjCheckNAddTwoStates(37, 35); + break; + case 63: + if ((jjbitVec0[i2] & l2) != 0L) + jjAddStates(33, 34); + break; + case 65: + if ((jjbitVec0[i2] & l2) != 0L) + jjAddStates(21, 23); + break; + case 74: + if ((jjbitVec0[i2] & l2) != 0L) + jjCheckNAddTwoStates(74, 75); + break; + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 95 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +private int jjMoveStringLiteralDfa0_1() +{ + return jjMoveNfa_1(0, 0); +} +private int jjMoveNfa_1(int startState, int curPos) +{ + int startsAt = 0; + jjnewStateCnt = 3; + int i = 1; + jjstateSet[0] = startState; + int kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + do + { + switch(jjstateSet[--i]) + { + case 0: + if ((0x2400L & l) != 0L) + { + if (kind > 8) + kind = 8; + } + if (curChar == 13) + jjstateSet[jjnewStateCnt++] = 1; + break; + case 1: + if (curChar == 10 && kind > 8) + kind = 8; + break; + case 2: + if (curChar == 13) + jjstateSet[jjnewStateCnt++] = 1; + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + long l = 1L << (curChar & 077); + do + { + switch(jjstateSet[--i]) + { + default : break; + } + } while(i != startsAt); + } + else + { + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + do + { + switch(jjstateSet[--i]) + { + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 3 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +static final int[] jjnextStates = { + 29, 30, 32, 85, 93, 94, 74, 76, 78, 63, 65, 66, 68, 44, 49, 55, + 61, 28, 33, 36, 38, 65, 66, 68, 65, 66, 70, 68, 79, 80, 82, 14, + 15, 63, 64, 67, 69, 71, +}; + +/** Token literal values. */ +public static final String[] jjstrLiteralImages = { +"", null, null, null, null, null, null, null, null, +"\151\155\160\157\162\164", "\164\162\141\156\163", "\151\146", "\145\154\163\145", "\145\156\144", +"\105\170\164\162\141\143\164", "\151\144\145\156\164\151\146\151\145\162\105\170\164\162\141\143\164", +"\103\157\156\166\145\162\164", "\122\145\147\105\170\160\162", "\147\145\164\126\141\154\165\145", +"\143\157\160\171", "\144\145\143\154\141\162\145\137\156\163", +"\144\145\143\154\141\162\145\137\163\143\162\151\160\164", "\144\145\143\154\141\162\145\137\163\165\142\163\143\162\151\160\164", +"\160\162\145\160\162\157\143\145\163\163", "\163\145\164", "\163\153\151\160\122\145\143\157\162\144", +"\163\160\154\151\164", "\163\164\141\164\151\143", "\170\160\141\164\150\72", "\141\160\160\154\171", +"\145\155\160\164\171", "\144\142\154\157\157\153\165\160", "\142\154\141\143\153\154\151\163\164", +"\154\157\157\153\165\160", "\143\157\156\143\141\164", "\73", "\135", "\173", "\175", "\54", "\51", +"\47", "\75", "\41\75", null, null, null, null, "\53", "\174", "\57", "\56\56", +"\100", "\50", "\133", null, null, null, null, null, null, null, null, null, null, +null, null, null, null, null, }; + +/** Lexer state names. */ +public static final String[] lexStateNames = { + "DEFAULT", + "IN_SINGLE_LINE_COMMENT", +}; + +/** Lex State array. */ +public static final int[] jjnewLexState = { + -1, -1, -1, -1, -1, -1, -1, 1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; +static final long[] jjtoToken = { + 0xf7fffffffffffe01L, 0x3L, +}; +static final long[] jjtoSkip = { + 0x17eL, 0x0L, +}; +static final long[] jjtoSpecial = { + 0x100L, 0x0L, +}; +static final long[] jjtoMore = { + 0x80L, 0x0L, +}; +protected SimpleCharStream input_stream; +private final int[] jjrounds = new int[95]; +private final int[] jjstateSet = new int[190]; +private final StringBuilder jjimage = new StringBuilder(); +private StringBuilder image = jjimage; +private int jjimageLen; +private int lengthOfMatch; +protected char curChar; +/** Constructor. */ +public FtScriptTokenManager(SimpleCharStream stream){ + if (SimpleCharStream.staticFlag) + throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer."); + input_stream = stream; +} + +/** Constructor. */ +public FtScriptTokenManager(SimpleCharStream stream, int lexState){ + this(stream); + SwitchTo(lexState); +} + +/** Reinitialise parser. */ +public void ReInit(SimpleCharStream stream) +{ + jjmatchedPos = jjnewStateCnt = 0; + curLexState = defaultLexState; + input_stream = stream; + ReInitRounds(); +} +private void ReInitRounds() +{ + int i; + jjround = 0x80000001; + for (i = 95; i-- > 0;) + jjrounds[i] = 0x80000000; +} + +/** Reinitialise parser. */ +public void ReInit(SimpleCharStream stream, int lexState) +{ + ReInit(stream); + SwitchTo(lexState); +} + +/** Switch to specified lex state. */ +public void SwitchTo(int lexState) +{ + if (lexState >= 2 || lexState < 0) + throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE); + else + curLexState = lexState; +} + +protected Token jjFillToken() +{ + final Token t; + final String curTokenImage; + final int beginLine; + final int endLine; + final int beginColumn; + final int endColumn; + String im = jjstrLiteralImages[jjmatchedKind]; + curTokenImage = (im == null) ? input_stream.GetImage() : im; + beginLine = input_stream.getBeginLine(); + beginColumn = input_stream.getBeginColumn(); + endLine = input_stream.getEndLine(); + endColumn = input_stream.getEndColumn(); + t = Token.newToken(jjmatchedKind, curTokenImage); + + t.beginLine = beginLine; + t.endLine = endLine; + t.beginColumn = beginColumn; + t.endColumn = endColumn; + + return t; +} + +int curLexState = 0; +int defaultLexState = 0; +int jjnewStateCnt; +int jjround; +int jjmatchedPos; +int jjmatchedKind; + +/** Get the next Token. */ +public Token getNextToken() +{ + Token specialToken = null; + Token matchedToken; + int curPos = 0; + + EOFLoop : + for (;;) + { + try + { + curChar = input_stream.BeginToken(); + } + catch(java.io.IOException e) + { + jjmatchedKind = 0; + matchedToken = jjFillToken(); + matchedToken.specialToken = specialToken; + return matchedToken; + } + image = jjimage; + image.setLength(0); + jjimageLen = 0; + + for (;;) + { + switch(curLexState) + { + case 0: + try { input_stream.backup(0); + while (curChar <= 13 && (0x2600L & (1L << curChar)) != 0L) + curChar = input_stream.BeginToken(); + } + catch (java.io.IOException e1) { continue EOFLoop; } + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_0(); + break; + case 1: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_1(); + break; + } + if (jjmatchedKind != 0x7fffffff) + { + if (jjmatchedPos + 1 < curPos) + input_stream.backup(curPos - jjmatchedPos - 1); + if ((jjtoToken[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L) + { + matchedToken = jjFillToken(); + matchedToken.specialToken = specialToken; + if (jjnewLexState[jjmatchedKind] != -1) + curLexState = jjnewLexState[jjmatchedKind]; + return matchedToken; + } + else if ((jjtoSkip[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L) + { + if ((jjtoSpecial[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L) + { + matchedToken = jjFillToken(); + if (specialToken == null) + specialToken = matchedToken; + else + { + matchedToken.specialToken = specialToken; + specialToken = (specialToken.next = matchedToken); + } + SkipLexicalActions(matchedToken); + } + else + SkipLexicalActions(null); + if (jjnewLexState[jjmatchedKind] != -1) + curLexState = jjnewLexState[jjmatchedKind]; + continue EOFLoop; + } + jjimageLen += jjmatchedPos + 1; + if (jjnewLexState[jjmatchedKind] != -1) + curLexState = jjnewLexState[jjmatchedKind]; + curPos = 0; + jjmatchedKind = 0x7fffffff; + try { + curChar = input_stream.readChar(); + continue; + } + catch (java.io.IOException e1) { } + } + int error_line = input_stream.getEndLine(); + int error_column = input_stream.getEndColumn(); + String error_after = null; + boolean EOFSeen = false; + try { input_stream.readChar(); input_stream.backup(1); } + catch (java.io.IOException e1) { + EOFSeen = true; + error_after = curPos <= 1 ? "" : input_stream.GetImage(); + if (curChar == '\n' || curChar == '\r') { + error_line++; + error_column = 0; + } + else + error_column++; + } + if (!EOFSeen) { + input_stream.backup(1); + error_after = curPos <= 1 ? "" : input_stream.GetImage(); + } + throw new TokenMgrError(EOFSeen, curLexState, error_line, error_column, error_after, curChar, TokenMgrError.LEXICAL_ERROR); + } + } +} + +void SkipLexicalActions(Token matchedToken) +{ + switch(jjmatchedKind) + { + default : + break; + } +} +private void jjCheckNAdd(int state) +{ + if (jjrounds[state] != jjround) + { + jjstateSet[jjnewStateCnt++] = state; + jjrounds[state] = jjround; + } +} +private void jjAddStates(int start, int end) +{ + do { + jjstateSet[jjnewStateCnt++] = jjnextStates[start]; + } while (start++ != end); +} +private void jjCheckNAddTwoStates(int state1, int state2) +{ + jjCheckNAdd(state1); + jjCheckNAdd(state2); +} + +private void jjCheckNAddStates(int start, int end) +{ + do { + jjCheckNAdd(jjnextStates[start]); + } while (start++ != end); +} + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptTreeConstants.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptTreeConstants.java new file mode 100644 index 0000000..63125e1 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptTreeConstants.java @@ -0,0 +1,39 @@ +/* Generated By:JavaCC: Do not edit this line. FtScriptTreeConstants.java Version 5.0 */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +public interface FtScriptTreeConstants +{ + public int JJTSTART = 0; + public int JJTMYASSIGN = 1; + public int JJTVOID = 2; + public int JJTMYATTRIBUTE = 3; + public int JJTMYCONDITION = 4; + public int JJTMYEMPTY = 5; + public int JJTMYIMPORT = 6; + public int JJTMYNS = 7; + public int JJTMYPREPROCESS = 8; + public int JJTMYSET = 9; + public int JJTMYSKIP = 10; + public int JJTMYCOPY = 11; + public int JJTMYOP = 12; + public int JJTMYSCRIPT = 13; + + + public String[] jjtNodeName = { + "Start", + "MyAssign", + "void", + "MyAttribute", + "MyCondition", + "MyEmpty", + "MyImport", + "MyNs", + "MyPreprocess", + "MySet", + "MySkip", + "MyCopy", + "MyOp", + "MyScript", + }; +} +/* JavaCC - OriginalChecksum=c8056b53459b9b66f3a28fd32fead01c (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptVisitor.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptVisitor.java new file mode 100644 index 0000000..07203f2 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/FtScriptVisitor.java @@ -0,0 +1,21 @@ +/* Generated By:JavaCC: Do not edit this line. FtScriptVisitor.java Version 5.0 */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +public interface FtScriptVisitor +{ + public Object visit(SimpleNode node, Object data); + public Object visit(ASTStart node, Object data); + public Object visit(ASTMyAssign node, Object data); + public Object visit(ASTMyAttribute node, Object data); + public Object visit(ASTMyCondition node, Object data); + public Object visit(ASTMyEmpty node, Object data); + public Object visit(ASTMyImport node, Object data); + public Object visit(ASTMyNs node, Object data); + public Object visit(ASTMyPreprocess node, Object data); + public Object visit(ASTMySet node, Object data); + public Object visit(ASTMySkip node, Object data); + public Object visit(ASTMyCopy node, Object data); + public Object visit(ASTMyOp node, Object data); + public Object visit(ASTMyScript node, Object data); +} +/* JavaCC - OriginalChecksum=878b8e974e60d303d7be3d6ce91428ec (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/JJTFtScriptState.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/JJTFtScriptState.java new file mode 100644 index 0000000..7af9bc9 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/JJTFtScriptState.java @@ -0,0 +1,123 @@ +/* Generated By:JavaCC: Do not edit this line. JJTFtScriptState.java Version 5.0 */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +public class JJTFtScriptState { + private java.util.List nodes; + private java.util.List marks; + + private int sp; // number of nodes on stack + private int mk; // current mark + private boolean node_created; + + public JJTFtScriptState() { + nodes = new java.util.ArrayList(); + marks = new java.util.ArrayList(); + sp = 0; + mk = 0; + } + + /* Determines whether the current node was actually closed and + pushed. This should only be called in the final user action of a + node scope. */ + public boolean nodeCreated() { + return node_created; + } + + /* Call this to reinitialize the node stack. It is called + automatically by the parser's ReInit() method. */ + public void reset() { + nodes.clear(); + marks.clear(); + sp = 0; + mk = 0; + } + + /* Returns the root node of the AST. It only makes sense to call + this after a successful parse. */ + public Node rootNode() { + return nodes.get(0); + } + + /* Pushes a node on to the stack. */ + public void pushNode(Node n) { + nodes.add(n); + ++sp; + } + + /* Returns the node on the top of the stack, and remove it from the + stack. */ + public Node popNode() { + if (--sp < mk) { + mk = marks.remove(marks.size()-1); + } + return nodes.remove(nodes.size()-1); + } + + /* Returns the node currently on the top of the stack. */ + public Node peekNode() { + return nodes.get(nodes.size()-1); + } + + /* Returns the number of children on the stack in the current node + scope. */ + public int nodeArity() { + return sp - mk; + } + + + public void clearNodeScope(Node n) { + while (sp > mk) { + popNode(); + } + mk = marks.remove(marks.size()-1); + } + + + public void openNodeScope(Node n) { + marks.add(mk); + mk = sp; + n.jjtOpen(); + } + + + /* A definite node is constructed from a specified number of + children. That number of nodes are popped from the stack and + made the children of the definite node. Then the definite node + is pushed on to the stack. */ + public void closeNodeScope(Node n, int num) { + mk = marks.remove(marks.size()-1); + while (num-- > 0) { + Node c = popNode(); + c.jjtSetParent(n); + n.jjtAddChild(c, num); + } + n.jjtClose(); + pushNode(n); + node_created = true; + } + + + /* A conditional node is constructed if its condition is true. All + the nodes that have been pushed since the node was opened are + made children of the conditional node, which is then pushed + on to the stack. If the condition is false the node is not + constructed and they are left on the stack. */ + public void closeNodeScope(Node n, boolean condition) { + if (condition) { + int a = nodeArity(); + mk = marks.remove(marks.size()-1); + while (a-- > 0) { + Node c = popNode(); + c.jjtSetParent(n); + n.jjtAddChild(c, a); + } + n.jjtClose(); + pushNode(n); + node_created = true; + } else { + mk = marks.remove(marks.size()-1); + node_created = false; + } + } +} +/* JavaCC - OriginalChecksum=ecb473ab5135001f535b0946f205a35d (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/Node.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/Node.java new file mode 100644 index 0000000..ca90095 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/Node.java @@ -0,0 +1,39 @@ +/* Generated By:JJTree: Do not edit this line. Node.java Version 4.3 */ +/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +/* All AST nodes must implement this interface. It provides basic + machinery for constructing the parent and child relationships + between nodes. */ + +public +interface Node { + + /** This method is called after the node has been made the current + node. It indicates that child nodes can now be added to it. */ + public void jjtOpen(); + + /** This method is called after all the child nodes have been + added. */ + public void jjtClose(); + + /** This pair of methods are used to inform the node of its + parent. */ + public void jjtSetParent(Node n); + public Node jjtGetParent(); + + /** This method tells the node to add its argument to the node's + list of children. */ + public void jjtAddChild(Node n, int i); + + /** This method returns a child node. The children are numbered + from zero, left to right. */ + public Node jjtGetChild(int i); + + /** Return the number of children the node has. */ + public int jjtGetNumChildren(); + + /** Accept the visitor. **/ + public Object jjtAccept(FtScriptVisitor visitor, Object data); +} +/* JavaCC - OriginalChecksum=a824598ce2b865e1e8afb12061994660 (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ParseException.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ParseException.java new file mode 100644 index 0000000..a0cb54a --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ParseException.java @@ -0,0 +1,187 @@ +/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 5.0 */ +/* JavaCCOptions:KEEP_LINE_COL=null */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +/** + * This exception is thrown when parse errors are encountered. + * You can explicitly create objects of this exception type by + * calling the method generateParseException in the generated + * parser. + * + * You can modify this class to customize your error reporting + * mechanisms so long as you retain the public fields. + */ +public class ParseException extends Exception { + + /** + * The version identifier for this Serializable class. + * Increment only if the serialized form of the + * class changes. + */ + private static final long serialVersionUID = 1L; + + /** + * This constructor is used by the method "generateParseException" + * in the generated parser. Calling this constructor generates + * a new object of this type with the fields "currentToken", + * "expectedTokenSequences", and "tokenImage" set. + */ + public ParseException(Token currentTokenVal, + int[][] expectedTokenSequencesVal, + String[] tokenImageVal + ) + { + super(initialise(currentTokenVal, expectedTokenSequencesVal, tokenImageVal)); + currentToken = currentTokenVal; + expectedTokenSequences = expectedTokenSequencesVal; + tokenImage = tokenImageVal; + } + + /** + * The following constructors are for use by you for whatever + * purpose you can think of. Constructing the exception in this + * manner makes the exception behave in the normal way - i.e., as + * documented in the class "Throwable". The fields "errorToken", + * "expectedTokenSequences", and "tokenImage" do not contain + * relevant information. The JavaCC generated code does not use + * these constructors. + */ + + public ParseException() { + super(); + } + + /** Constructor with message. */ + public ParseException(String message) { + super(message); + } + + + /** + * This is the last token that has been consumed successfully. If + * this object has been created due to a parse error, the token + * followng this token will (therefore) be the first error token. + */ + public Token currentToken; + + /** + * Each entry in this array is an array of integers. Each array + * of integers represents a sequence of tokens (by their ordinal + * values) that is expected at this point of the parse. + */ + public int[][] expectedTokenSequences; + + /** + * This is a reference to the "tokenImage" array of the generated + * parser within which the parse error occurred. This array is + * defined in the generated ...Constants interface. + */ + public String[] tokenImage; + + /** + * It uses "currentToken" and "expectedTokenSequences" to generate a parse + * error message and returns it. If this object has been created + * due to a parse error, and you do not catch it (it gets thrown + * from the parser) the correct error message + * gets displayed. + */ + private static String initialise(Token currentToken, + int[][] expectedTokenSequences, + String[] tokenImage) { + String eol = System.getProperty("line.separator", "\n"); + StringBuffer expected = new StringBuffer(); + int maxSize = 0; + for (int i = 0; i < expectedTokenSequences.length; i++) { + if (maxSize < expectedTokenSequences[i].length) { + maxSize = expectedTokenSequences[i].length; + } + for (int j = 0; j < expectedTokenSequences[i].length; j++) { + expected.append(tokenImage[expectedTokenSequences[i][j]]).append(' '); + } + if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) { + expected.append("..."); + } + expected.append(eol).append(" "); + } + String retval = "Encountered \""; + Token tok = currentToken.next; + for (int i = 0; i < maxSize; i++) { + if (i != 0) retval += " "; + if (tok.kind == 0) { + retval += tokenImage[0]; + break; + } + retval += " " + tokenImage[tok.kind]; + retval += " \""; + retval += add_escapes(tok.image); + retval += " \""; + tok = tok.next; + } + retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn; + retval += "." + eol; + if (expectedTokenSequences.length == 1) { + retval += "Was expecting:" + eol + " "; + } else { + retval += "Was expecting one of:" + eol + " "; + } + retval += expected.toString(); + return retval; + } + + /** + * The end of line string for this machine. + */ + protected String eol = System.getProperty("line.separator", "\n"); + + /** + * Used to convert raw characters to their escaped version + * when these raw version cannot be used as part of an ASCII + * string literal. + */ + static String add_escapes(String str) { + StringBuffer retval = new StringBuffer(); + char ch; + for (int i = 0; i < str.length(); i++) { + switch (str.charAt(i)) + { + case 0 : + continue; + case '\b': + retval.append("\\b"); + continue; + case '\t': + retval.append("\\t"); + continue; + case '\n': + retval.append("\\n"); + continue; + case '\f': + retval.append("\\f"); + continue; + case '\r': + retval.append("\\r"); + continue; + case '\"': + retval.append("\\\""); + continue; + case '\'': + retval.append("\\\'"); + continue; + case '\\': + retval.append("\\\\"); + continue; + default: + if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { + String s = "0000" + Integer.toString(ch, 16); + retval.append("\\u" + s.substring(s.length() - 4, s.length())); + } else { + retval.append(ch); + } + continue; + } + } + return retval.toString(); + } + +} +/* JavaCC - OriginalChecksum=5442a73da71e919193d4dba049348359 (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/SimpleCharStream.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/SimpleCharStream.java new file mode 100644 index 0000000..fff86d3 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/SimpleCharStream.java @@ -0,0 +1,471 @@ +/* Generated By:JavaCC: Do not edit this line. SimpleCharStream.java Version 5.0 */ +/* JavaCCOptions:STATIC=false,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +/** + * An implementation of interface CharStream, where the stream is assumed to + * contain only ASCII characters (without unicode processing). + */ + +public class SimpleCharStream +{ +/** Whether parser is static. */ + public static final boolean staticFlag = false; + int bufsize; + int available; + int tokenBegin; +/** Position in buffer. */ + public int bufpos = -1; + protected int bufline[]; + protected int bufcolumn[]; + + protected int column = 0; + protected int line = 1; + + protected boolean prevCharIsCR = false; + protected boolean prevCharIsLF = false; + + protected java.io.Reader inputStream; + + protected char[] buffer; + protected int maxNextCharInd = 0; + protected int inBuf = 0; + protected int tabSize = 8; + + protected void setTabSize(int i) { tabSize = i; } + protected int getTabSize(int i) { return tabSize; } + + + protected void ExpandBuff(boolean wrapAround) + { + char[] newbuffer = new char[bufsize + 2048]; + int newbufline[] = new int[bufsize + 2048]; + int newbufcolumn[] = new int[bufsize + 2048]; + + try + { + if (wrapAround) + { + System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin); + System.arraycopy(buffer, 0, newbuffer, bufsize - tokenBegin, bufpos); + buffer = newbuffer; + + System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin); + System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos); + bufline = newbufline; + + System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin); + System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos); + bufcolumn = newbufcolumn; + + maxNextCharInd = (bufpos += (bufsize - tokenBegin)); + } + else + { + System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin); + buffer = newbuffer; + + System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin); + bufline = newbufline; + + System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin); + bufcolumn = newbufcolumn; + + maxNextCharInd = (bufpos -= tokenBegin); + } + } + catch (Throwable t) + { + throw new Error(t.getMessage()); + } + + + bufsize += 2048; + available = bufsize; + tokenBegin = 0; + } + + protected void FillBuff() throws java.io.IOException + { + if (maxNextCharInd == available) + { + if (available == bufsize) + { + if (tokenBegin > 2048) + { + bufpos = maxNextCharInd = 0; + available = tokenBegin; + } + else if (tokenBegin < 0) + bufpos = maxNextCharInd = 0; + else + ExpandBuff(false); + } + else if (available > tokenBegin) + available = bufsize; + else if ((tokenBegin - available) < 2048) + ExpandBuff(true); + else + available = tokenBegin; + } + + int i; + try { + if ((i = inputStream.read(buffer, maxNextCharInd, available - maxNextCharInd)) == -1) + { + inputStream.close(); + throw new java.io.IOException(); + } + else + maxNextCharInd += i; + return; + } + catch(java.io.IOException e) { + --bufpos; + backup(0); + if (tokenBegin == -1) + tokenBegin = bufpos; + throw e; + } + } + +/** Start. */ + public char BeginToken() throws java.io.IOException + { + tokenBegin = -1; + char c = readChar(); + tokenBegin = bufpos; + + return c; + } + + protected void UpdateLineColumn(char c) + { + column++; + + if (prevCharIsLF) + { + prevCharIsLF = false; + line += (column = 1); + } + else if (prevCharIsCR) + { + prevCharIsCR = false; + if (c == '\n') + { + prevCharIsLF = true; + } + else + line += (column = 1); + } + + switch (c) + { + case '\r' : + prevCharIsCR = true; + break; + case '\n' : + prevCharIsLF = true; + break; + case '\t' : + column--; + column += (tabSize - (column % tabSize)); + break; + default : + break; + } + + bufline[bufpos] = line; + bufcolumn[bufpos] = column; + } + +/** Read a character. */ + public char readChar() throws java.io.IOException + { + if (inBuf > 0) + { + --inBuf; + + if (++bufpos == bufsize) + bufpos = 0; + + return buffer[bufpos]; + } + + if (++bufpos >= maxNextCharInd) + FillBuff(); + + char c = buffer[bufpos]; + + UpdateLineColumn(c); + return c; + } + + @Deprecated + /** + * @deprecated + * @see #getEndColumn + */ + + public int getColumn() { + return bufcolumn[bufpos]; + } + + @Deprecated + /** + * @deprecated + * @see #getEndLine + */ + + public int getLine() { + return bufline[bufpos]; + } + + /** Get token end column number. */ + public int getEndColumn() { + return bufcolumn[bufpos]; + } + + /** Get token end line number. */ + public int getEndLine() { + return bufline[bufpos]; + } + + /** Get token beginning column number. */ + public int getBeginColumn() { + return bufcolumn[tokenBegin]; + } + + /** Get token beginning line number. */ + public int getBeginLine() { + return bufline[tokenBegin]; + } + +/** Backup a number of characters. */ + public void backup(int amount) { + + inBuf += amount; + if ((bufpos -= amount) < 0) + bufpos += bufsize; + } + + /** Constructor. */ + public SimpleCharStream(java.io.Reader dstream, int startline, + int startcolumn, int buffersize) + { + inputStream = dstream; + line = startline; + column = startcolumn - 1; + + available = bufsize = buffersize; + buffer = new char[buffersize]; + bufline = new int[buffersize]; + bufcolumn = new int[buffersize]; + } + + /** Constructor. */ + public SimpleCharStream(java.io.Reader dstream, int startline, + int startcolumn) + { + this(dstream, startline, startcolumn, 4096); + } + + /** Constructor. */ + public SimpleCharStream(java.io.Reader dstream) + { + this(dstream, 1, 1, 4096); + } + + /** Reinitialise. */ + public void ReInit(java.io.Reader dstream, int startline, + int startcolumn, int buffersize) + { + inputStream = dstream; + line = startline; + column = startcolumn - 1; + + if (buffer == null || buffersize != buffer.length) + { + available = bufsize = buffersize; + buffer = new char[buffersize]; + bufline = new int[buffersize]; + bufcolumn = new int[buffersize]; + } + prevCharIsLF = prevCharIsCR = false; + tokenBegin = inBuf = maxNextCharInd = 0; + bufpos = -1; + } + + /** Reinitialise. */ + public void ReInit(java.io.Reader dstream, int startline, + int startcolumn) + { + ReInit(dstream, startline, startcolumn, 4096); + } + + /** Reinitialise. */ + public void ReInit(java.io.Reader dstream) + { + ReInit(dstream, 1, 1, 4096); + } + /** Constructor. */ + public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline, + int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException + { + this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize); + } + + /** Constructor. */ + public SimpleCharStream(java.io.InputStream dstream, int startline, + int startcolumn, int buffersize) + { + this(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize); + } + + /** Constructor. */ + public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline, + int startcolumn) throws java.io.UnsupportedEncodingException + { + this(dstream, encoding, startline, startcolumn, 4096); + } + + /** Constructor. */ + public SimpleCharStream(java.io.InputStream dstream, int startline, + int startcolumn) + { + this(dstream, startline, startcolumn, 4096); + } + + /** Constructor. */ + public SimpleCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException + { + this(dstream, encoding, 1, 1, 4096); + } + + /** Constructor. */ + public SimpleCharStream(java.io.InputStream dstream) + { + this(dstream, 1, 1, 4096); + } + + /** Reinitialise. */ + public void ReInit(java.io.InputStream dstream, String encoding, int startline, + int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException + { + ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize); + } + + /** Reinitialise. */ + public void ReInit(java.io.InputStream dstream, int startline, + int startcolumn, int buffersize) + { + ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize); + } + + /** Reinitialise. */ + public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException + { + ReInit(dstream, encoding, 1, 1, 4096); + } + + /** Reinitialise. */ + public void ReInit(java.io.InputStream dstream) + { + ReInit(dstream, 1, 1, 4096); + } + /** Reinitialise. */ + public void ReInit(java.io.InputStream dstream, String encoding, int startline, + int startcolumn) throws java.io.UnsupportedEncodingException + { + ReInit(dstream, encoding, startline, startcolumn, 4096); + } + /** Reinitialise. */ + public void ReInit(java.io.InputStream dstream, int startline, + int startcolumn) + { + ReInit(dstream, startline, startcolumn, 4096); + } + /** Get token literal value. */ + public String GetImage() + { + if (bufpos >= tokenBegin) + return new String(buffer, tokenBegin, bufpos - tokenBegin + 1); + else + return new String(buffer, tokenBegin, bufsize - tokenBegin) + + new String(buffer, 0, bufpos + 1); + } + + /** Get the suffix. */ + public char[] GetSuffix(int len) + { + char[] ret = new char[len]; + + if ((bufpos + 1) >= len) + System.arraycopy(buffer, bufpos - len + 1, ret, 0, len); + else + { + System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0, + len - bufpos - 1); + System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1); + } + + return ret; + } + + /** Reset buffer when finished. */ + public void Done() + { + buffer = null; + bufline = null; + bufcolumn = null; + } + + /** + * Method to adjust line and column numbers for the start of a token. + */ + public void adjustBeginLineColumn(int newLine, int newCol) + { + int start = tokenBegin; + int len; + + if (bufpos >= tokenBegin) + { + len = bufpos - tokenBegin + inBuf + 1; + } + else + { + len = bufsize - tokenBegin + bufpos + 1 + inBuf; + } + + int i = 0, j = 0, k = 0; + int nextColDiff = 0, columnDiff = 0; + + while (i < len && bufline[j = start % bufsize] == bufline[k = ++start % bufsize]) + { + bufline[j] = newLine; + nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j]; + bufcolumn[j] = newCol + columnDiff; + columnDiff = nextColDiff; + i++; + } + + if (i < len) + { + bufline[j] = newLine++; + bufcolumn[j] = newCol + columnDiff; + + while (i++ < len) + { + if (bufline[j = start % bufsize] != bufline[++start % bufsize]) + bufline[j] = newLine++; + else + bufline[j] = newLine; + } + } + + line = bufline[j]; + column = bufcolumn[j]; + } + +} +/* JavaCC - OriginalChecksum=ce31feeb88a5437b2236b59d9470870c (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/SimpleNode.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/SimpleNode.java new file mode 100644 index 0000000..36cf5d4 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/SimpleNode.java @@ -0,0 +1,96 @@ +/* Generated By:JJTree: Do not edit this line. SimpleNode.java Version 4.3 */ +/* JavaCCOptions:MULTI=true,NODE_USES_PARSER=false,VISITOR=true,TRACK_TOKENS=false,NODE_PREFIX=AST,NODE_EXTENDS=,NODE_FACTORY=,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +public +class SimpleNode implements Node { + + protected Node parent; + protected Node[] children; + protected int id; + protected Object value; + protected FtScript parser; + + public SimpleNode(int i) { + id = i; + } + + public SimpleNode(FtScript p, int i) { + this(i); + parser = p; + } + + public void jjtOpen() { + } + + public void jjtClose() { + } + + public void jjtSetParent(Node n) { parent = n; } + public Node jjtGetParent() { return parent; } + + public void jjtAddChild(Node n, int i) { + if (children == null) { + children = new Node[i + 1]; + } else if (i >= children.length) { + Node c[] = new Node[i + 1]; + System.arraycopy(children, 0, c, 0, children.length); + children = c; + } + children[i] = n; + } + + public Node jjtGetChild(int i) { + return children[i]; + } + + public int jjtGetNumChildren() { + return (children == null) ? 0 : children.length; + } + + public void jjtSetValue(Object value) { this.value = value; } + public Object jjtGetValue() { return value; } + + /** Accept the visitor. **/ + public Object jjtAccept(FtScriptVisitor visitor, Object data) +{ + return visitor.visit(this, data); + } + + /** Accept the visitor. **/ + public Object childrenAccept(FtScriptVisitor visitor, Object data) +{ + if (children != null) { + for (int i = 0; i < children.length; ++i) { + children[i].jjtAccept(visitor, data); + } + } + return data; + } + + /* You can override these two methods in subclasses of SimpleNode to + customize the way the node appears when the tree is dumped. If + your output uses more than one line you should override + toString(String), otherwise overriding toString() is probably all + you need to do. */ + + public String toString() { return FtScriptTreeConstants.jjtNodeName[id]; } + public String toString(String prefix) { return prefix + toString(); } + + /* Override this method if you want to customize how the node dumps + out its children. */ + + public void dump(String prefix) { + System.out.println(toString(prefix)); + if (children != null) { + for (int i = 0; i < children.length; ++i) { + SimpleNode n = (SimpleNode)children[i]; + if (n != null) { + n.dump(prefix + " "); + } + } + } + } +} + +/* JavaCC - OriginalChecksum=67cc365cba43ea3c43c2635579e8f356 (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/Token.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/Token.java new file mode 100644 index 0000000..1607096 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/Token.java @@ -0,0 +1,131 @@ +/* Generated By:JavaCC: Do not edit this line. Token.java Version 5.0 */ +/* JavaCCOptions:TOKEN_EXTENDS=,KEEP_LINE_COL=null,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +/** + * Describes the input token stream. + */ + +public class Token implements java.io.Serializable { + + /** + * The version identifier for this Serializable class. + * Increment only if the serialized form of the + * class changes. + */ + private static final long serialVersionUID = 1L; + + /** + * An integer that describes the kind of this token. This numbering + * system is determined by JavaCCParser, and a table of these numbers is + * stored in the file ...Constants.java. + */ + public int kind; + + /** The line number of the first character of this Token. */ + public int beginLine; + /** The column number of the first character of this Token. */ + public int beginColumn; + /** The line number of the last character of this Token. */ + public int endLine; + /** The column number of the last character of this Token. */ + public int endColumn; + + /** + * The string image of the token. + */ + public String image; + + /** + * A reference to the next regular (non-special) token from the input + * stream. If this is the last token from the input stream, or if the + * token manager has not read tokens beyond this one, this field is + * set to null. This is true only if this token is also a regular + * token. Otherwise, see below for a description of the contents of + * this field. + */ + public Token next; + + /** + * This field is used to access special tokens that occur prior to this + * token, but after the immediately preceding regular (non-special) token. + * If there are no such special tokens, this field is set to null. + * When there are more than one such special token, this field refers + * to the last of these special tokens, which in turn refers to the next + * previous special token through its specialToken field, and so on + * until the first special token (whose specialToken field is null). + * The next fields of special tokens refer to other special tokens that + * immediately follow it (without an intervening regular token). If there + * is no such token, this field is null. + */ + public Token specialToken; + + /** + * An optional attribute value of the Token. + * Tokens which are not used as syntactic sugar will often contain + * meaningful values that will be used later on by the compiler or + * interpreter. This attribute value is often different from the image. + * Any subclass of Token that actually wants to return a non-null value can + * override this method as appropriate. + */ + public Object getValue() { + return null; + } + + /** + * No-argument constructor + */ + public Token() {} + + /** + * Constructs a new token for the specified Image. + */ + public Token(int kind) + { + this(kind, null); + } + + /** + * Constructs a new token for the specified Image and Kind. + */ + public Token(int kind, String image) + { + this.kind = kind; + this.image = image; + } + + /** + * Returns the image. + */ + public String toString() + { + return image; + } + + /** + * Returns a new Token object, by default. However, if you want, you + * can create and return subclass objects based on the value of ofKind. + * Simply add the cases to the switch for all those special cases. + * For example, if you have a subclass of Token called IDToken that + * you want to create if ofKind is ID, simply add something like : + * + * case MyParserConstants.ID : return new IDToken(ofKind, image); + * + * to the following switch statement. Then you can cast matchedToken + * variable to the appropriate type and use sit in your lexical actions. + */ + public static Token newToken(int ofKind, String image) + { + switch(ofKind) + { + default : return new Token(ofKind, image); + } + } + + public static Token newToken(int ofKind) + { + return newToken(ofKind, null); + } + +} +/* JavaCC - OriginalChecksum=d30698094e3526551e198c33b8e7086d (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/TokenMgrError.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/TokenMgrError.java new file mode 100644 index 0000000..b2ddaca --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/TokenMgrError.java @@ -0,0 +1,147 @@ +/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 5.0 */ +/* JavaCCOptions: */ +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; + +/** Token Manager Error. */ +public class TokenMgrError extends Error +{ + + /** + * The version identifier for this Serializable class. + * Increment only if the serialized form of the + * class changes. + */ + private static final long serialVersionUID = 1L; + + /* + * Ordinals for various reasons why an Error of this type can be thrown. + */ + + /** + * Lexical error occurred. + */ + static final int LEXICAL_ERROR = 0; + + /** + * An attempt was made to create a second instance of a static token manager. + */ + static final int STATIC_LEXER_ERROR = 1; + + /** + * Tried to change to an invalid lexical state. + */ + static final int INVALID_LEXICAL_STATE = 2; + + /** + * Detected (and bailed out of) an infinite loop in the token manager. + */ + static final int LOOP_DETECTED = 3; + + /** + * Indicates the reason why the exception is thrown. It will have + * one of the above 4 values. + */ + int errorCode; + + /** + * Replaces unprintable characters by their escaped (or unicode escaped) + * equivalents in the given string + */ + protected static final String addEscapes(String str) { + StringBuffer retval = new StringBuffer(); + char ch; + for (int i = 0; i < str.length(); i++) { + switch (str.charAt(i)) + { + case 0 : + continue; + case '\b': + retval.append("\\b"); + continue; + case '\t': + retval.append("\\t"); + continue; + case '\n': + retval.append("\\n"); + continue; + case '\f': + retval.append("\\f"); + continue; + case '\r': + retval.append("\\r"); + continue; + case '\"': + retval.append("\\\""); + continue; + case '\'': + retval.append("\\\'"); + continue; + case '\\': + retval.append("\\\\"); + continue; + default: + if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { + String s = "0000" + Integer.toString(ch, 16); + retval.append("\\u" + s.substring(s.length() - 4, s.length())); + } else { + retval.append(ch); + } + continue; + } + } + return retval.toString(); + } + + /** + * Returns a detailed message for the Error when it is thrown by the + * token manager to indicate a lexical error. + * Parameters : + * EOFSeen : indicates if EOF caused the lexical error + * curLexState : lexical state in which this error occurred + * errorLine : line number when the error occurred + * errorColumn : column number when the error occurred + * errorAfter : prefix that was seen before this error occurred + * curchar : the offending character + * Note: You can customize the lexical error message by modifying this method. + */ + protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) { + return("Lexical error at line " + + errorLine + ", column " + + errorColumn + ". Encountered: " + + (EOFSeen ? " " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") + + "after : \"" + addEscapes(errorAfter) + "\""); + } + + /** + * You can also modify the body of this method to customize your error messages. + * For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not + * of end-users concern, so you can return something like : + * + * "Internal Error : Please file a bug report .... " + * + * from this method for such cases in the release version of your parser. + */ + public String getMessage() { + return super.getMessage(); + } + + /* + * Constructors of various flavors follow. + */ + + /** No arg constructor. */ + public TokenMgrError() { + } + + /** Constructor with message and reason. */ + public TokenMgrError(String message, int reason) { + super(message); + errorCode = reason; + } + + /** Full Constructor. */ + public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) { + this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); + } +} +/* JavaCC - OriginalChecksum=b7750665ed9570de389d9ad956321403 (do not edit this line) */ diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ft.jj b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ft.jj new file mode 100644 index 0000000..b56d62a --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ft.jj @@ -0,0 +1,909 @@ +/*@bgen(jjtree) Generated By:JJTree: Do not edit this line. ft.jj */ +/*@egen*//** +* JJTree file +* NODE_PACKAGE = "eu.dnetlib.data.collective.transformation.rulelanguage.node"; +*/ + +options { + STATIC=false; + + + JDK_VERSION = "1.6"; +} + +PARSER_BEGIN(FtScript) + +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; +import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; +public class FtScript/*@bgen(jjtree)*/implements FtScriptTreeConstants/*@egen*/ {/*@bgen(jjtree)*/ + protected JJTFtScriptState jjtree = new JJTFtScriptState(); + +/*@egen*/ +} + +PARSER_END(FtScript) + +SKIP : +{ + " " +| "\t" +| "\n" +| "\r" +| <"//" (~["\n","\r"])* ("\n"|"\r"|"\r\n")> +| <"/*" (~["*"])* "*" (~["/"] (~["*"])* "*")* "/"> +} + +MORE : { + "//" : IN_SINGLE_LINE_COMMENT } + +< IN_SINGLE_LINE_COMMENT > +SPECIAL_TOKEN : +{ + : DEFAULT +} + +/* RESERVED WORDS AND LITERALS */ +// KEYWORDS +TOKEN : { < IMPORT: "import" > } +TOKEN : { < TRANS: "trans" > } +TOKEN : { < IF: "if" > } +TOKEN : { < ELSE: "else" > } +TOKEN : { < END: "end" > } +TOKEN : { < EXTRACT: "Extract" > } +TOKEN : { < IDENTIFIEREXTRACT: "identifierExtract" > } +TOKEN : { < CONVERT: "Convert" > } +TOKEN : { < REGEXPR: "RegExpr" > } +TOKEN : { < GETVALUE: "getValue" > } +TOKEN : { < COPY: "copy" > } +TOKEN : { < DECLARE_NAMESPACE: "declare_ns" > } +TOKEN : { < DECLARE_SCRIPT: "declare_script" > } +TOKEN : { < DECLARE_SUBSCRIPT: "declare_subscript" > } +TOKEN : { < PREPROCESS: "preprocess" > } +TOKEN : { < SET: "set" > } +TOKEN : { < SKIPRECORD: "skipRecord" > } +TOKEN : { < SPLIT: "split" > } +TOKEN : { < STATIC: "static" > } +TOKEN : { < XPATH_SCHEME: "xpath:" > } +TOKEN : { < APPLY: "apply" > } +TOKEN : { < EMPTY: "empty" > } +TOKEN : { < DBLOOKUP: "dblookup" > } +TOKEN : { < BLACKLIST: "blacklist" > } +TOKEN : { < LOOKUP: "lookup" > } +TOKEN : { < CONCAT: "concat" > } + +// SPEC. CHARS +TOKEN : { < SEMICOLON: ";" > } +TOKEN : { < RBRACKET: "]" > } +TOKEN : { < LBRACE: "{" > } +TOKEN : { < RBRACE: "}" > } +TOKEN : { < COMMA: "," > } +TOKEN : { < RPAREN: ")" > } +TOKEN : { < QUOTE: "'" > } + +TOKEN : +{ + + < EQUAL: "=" > | + < NOTEQUAL: "!=" > | + < GT: " > " | ">" > | + < LT: " < " | "<" > | + < GTE: " >= " | ">=" > | + < LTE: " <= " | "<=" > | + < PLUS: "+" > | + < VBAR: "|" > | +// < SLASH_SLASH: "//" > | + < SLASH: "/" > | + + < DOT_DOT: ".." > | + //< DOT: "." > | + + < AT: "@" > | + + < LPAREN: "(" > | + < LBRACKET: "[" > | + < DOLLAR_QNAME: "$" > | + < PERCENT_QNAME: "%" < IDENTIFIER > > + +} + +TOKEN : +{ + < XPATH: + < XPATH_SCHEME > + > +} + +TOKEN : +{ + < JOBCONST: + < JOBCONST_PREFIX > ()* > +| < #JOBCONST_PREFIX: "$job." > } + +TOKEN: { + < QUOTED_STRING: "\"" (~["\""] )+ "\"" > } + +TOKEN: +{ + < SINGLE_QUOTE: "'" + (~["'"] + )+ "'" > } + +TOKEN : /* STRING LITERALS*/ +{ + < CHARACTER_LITERAL: + "'" + ( (~["'","\\","\n","\r"]) + | ("\\" + ( ["n","t","b","r","f","\\","'","\""] + | ["0"-"7"] ( ["0"-"7"] )? + | ["0"-"3"] ["0"-"7"] ["0"-"7"] + ) + ) + ) + "'" + > +| + < STRING_LITERAL: + "\"" + ( (~["\"", "\\", "\n", "\r"]) + | ("\\" + ( ["n", "t", "b", "r", "f", "\\", "'", "\""] + | ["0"-"7"] ( ["0"-"7"] )? + | ["0"-"3"] ["0"-"7"] ["0"-"7"] + ) + ) + )* + "\"" + > +} + + +TOKEN : { + < URI: + < IDENTIFIER > "://" ( "/" < IDENTIFIER > )* ( "/" )? + > } + + + +TOKEN : /* IDENTIFIERS */ +{ + < IDENTIFIER: + ( < AT >|) (||< DOT >|< COLON >|< AT >)* > +| < #LETTER_OR_DIGIT: ["_","a"-"z","A"-"Z","0"-"9"] > +| < #MINUS: "-" > +| < #DOT: "." > +| < #COLON: ":" > +//| < #AT: "@" > +} + + + +ASTStart Start() :{/*@bgen(jjtree) Start */ + ASTStart jjtn000 = new ASTStart(JJTSTART); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); +/*@egen*/} +{/*@bgen(jjtree) Start */ + try { +/*@egen*/ + (script())/*@bgen(jjtree)*/ + { + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + } +/*@egen*/ + { return jjtn000; }/*@bgen(jjtree)*/ + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + throw (RuntimeException)jjte000; + } + if (jjte000 instanceof ParseException) { + throw (ParseException)jjte000; + } + throw (Error)jjte000; + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } +/*@egen*/ +} + +/** + * id = value + */ +void assign(Rules r) : {/*@bgen(jjtree) MyAssign */ + ASTMyAssign jjtn000 = new ASTMyAssign(JJTMYASSIGN); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); +/*@egen*/String value = ""; Token t;} +{/*@bgen(jjtree) MyAssign */ + try { +/*@egen*/ + {jjtn000.setRule(r);} + (t = inputField()/*@bgen(jjtree)*/ + { + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + } +/*@egen*/ {jjtn000.setFieldExpression(t.image);} + |value = quotedString()/*@bgen(jjtree)*/ + { + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + } +/*@egen*/ {jjtn000.setAttribute(value);} + )/*@bgen(jjtree)*/ + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + throw (RuntimeException)jjte000; + } + if (jjte000 instanceof ParseException) { + throw (ParseException)jjte000; + } + throw (Error)jjte000; + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } +/*@egen*/ +} + +void attrib_list() : {} +{ + + (attribute())* + +} + + +void attribute() : {/*@bgen(jjtree) MyAttribute */ + ASTMyAttribute jjtn000 = new ASTMyAttribute(JJTMYATTRIBUTE); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); +/*@egen*/String value; Token t=null;} +{/*@bgen(jjtree) MyAttribute */ + try { +/*@egen*/ + (value = identifier() { jjtn000.setAttributeValue(value);} + |t = inputField() + { value = t.image; + jjtn000.setAttributeInputField(value); + } + ) + []/*@bgen(jjtree)*/ + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + throw (RuntimeException)jjte000; + } + if (jjte000 instanceof ParseException) { + throw (ParseException)jjte000; + } + throw (Error)jjte000; + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } +/*@egen*/ + +} + +/** + * a conditional Rule, which contains the condition, the Rule on which the condition holds and the alternative rule + */ +void conditionalStmt() : {/*@bgen(jjtree) MyCondition */ + ASTMyCondition jjtn000 = new ASTMyCondition(JJTMYCONDITION); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); +/*@egen*/Rules r1 = new Rules(); Rules r2 = new Rules(); Token cond; Token apply;} +{/*@bgen(jjtree) MyCondition */ + try { +/*@egen*/ + [< APPLY > apply = inputField() { + jjtn000.setApplyExpression(apply.image); } ] + < IF > cond = inputField() { jjtn000.setConditionalExpression(cond.image); jjtn000.setPrimaryRule(r1); jjtn000.setSecondaryRule(r2); } + rule(r1) + < ELSE > + rule(r2)/*@bgen(jjtree)*/ + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + throw (RuntimeException)jjte000; + } + if (jjte000 instanceof ParseException) { + throw (ParseException)jjte000; + } + throw (Error)jjte000; + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } +/*@egen*/ +} + +void empty(Rules r) : {/*@bgen(jjtree) MyEmpty */ + ASTMyEmpty jjtn000 = new ASTMyEmpty(JJTMYEMPTY); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); +/*@egen*/} +{/*@bgen(jjtree) MyEmpty */ + try { +/*@egen*/ + {jjtn000.setRule(r);} + < EMPTY >/*@bgen(jjtree)*/ + { + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + } +/*@egen*/ {jjtn000.setEmpty(true);}/*@bgen(jjtree)*/ + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } +/*@egen*/ +} + +String identifier() : {Token t;} +{ + t = {return t.image;} +} + +void importDeclaration() : +{/*@bgen(jjtree) MyImport */ + ASTMyImport jjtn000 = new ASTMyImport(JJTMYIMPORT); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); +/*@egen*/String scriptName;} +{/*@bgen(jjtree) MyImport */ + try { +/*@egen*/ + scriptName = identifier() ";"/*@bgen(jjtree)*/ + { + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + } +/*@egen*/ { jjtn000.setScriptName(scriptName); }/*@bgen(jjtree)*/ + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + throw (RuntimeException)jjte000; + } + if (jjte000 instanceof ParseException) { + throw (ParseException)jjte000; + } + throw (Error)jjte000; + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } +/*@egen*/ +} + +Token anyExpression() : { Token t; } +{ + t = < IDENTIFIER > { + return t; } } + +Token inputField() :{ Token t; } +{ + t = < XPATH > + { + return t; + } + | t = < JOBCONST > { + return t; } + | t = < DOLLAR_QNAME > + { + return t; + } +} + + +/* + * becomes obsolete + */ +String key() : {String key;} +{ + key = identifier() + + {return key;} +} + +void literal() : +{} +{ + < CHARACTER_LITERAL > | < STRING_LITERAL > +} + +void nsDeclaration() : +{/*@bgen(jjtree) MyNs */ + ASTMyNs jjtn000 = new ASTMyNs(JJTMYNS); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); +/*@egen*/String nsPrefix; String nsUri;} +{/*@bgen(jjtree) MyNs */ + try { +/*@egen*/ + < DECLARE_NAMESPACE > nsPrefix = identifier() < EQUAL > nsUri = quotedString() < SEMICOLON >/*@bgen(jjtree)*/ + { + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + } +/*@egen*/ + { + jjtn000.setNsDeclaration(nsPrefix, nsUri); + }/*@bgen(jjtree)*/ + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + throw (RuntimeException)jjte000; + } + if (jjte000 instanceof ParseException) { + throw (ParseException)jjte000; + } + throw (Error)jjte000; + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } +/*@egen*/ +} + +void preprocess() :{/*@bgen(jjtree) MyPreprocess */ + ASTMyPreprocess jjtn000 = new ASTMyPreprocess(JJTMYPREPROCESS); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); +/*@egen*/String preprocessId;} {/*@bgen(jjtree) MyPreprocess */ + try { +/*@egen*/ + ( < PREPROCESS > + preprocessId = identifier() < EQUAL > + < DBLOOKUP > + < LPAREN > { String sqlExpr; } + sqlExpr = quotedString() + < RPAREN > { jjtn000.preprocess(preprocessId, "dblookup", sqlExpr); } + < SEMICOLON > + ) | + ( + < BLACKLIST > + < LPAREN > { String blacklistDataSourceId; } + blacklistDataSourceId = quotedString() + < RPAREN > { jjtn000.preprocess("blacklist", blacklistDataSourceId); } + < SEMICOLON > )/*@bgen(jjtree)*/ + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + throw (RuntimeException)jjte000; + } + if (jjte000 instanceof ParseException) { + throw (ParseException)jjte000; + } + throw (Error)jjte000; + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } +/*@egen*/ } + +void set(Rules r) : {/*@bgen(jjtree) MySet */ + ASTMySet jjtn000 = new ASTMySet(JJTMYSET); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); +/*@egen*/Token expr; String value = "";} +{/*@bgen(jjtree) MySet */ + try { +/*@egen*/ + { jjtn000.setRule(r); } + + < SET > + < LPAREN > + ( + expr = inputField() { jjtn000.setValueExpression(expr.image); } + | value = quotedString() { jjtn000.setAttribute(value); } + ) + ( + < COMMA > + { + r = new Rules(); + } + rule(r) + { + jjtn000.addAttributeRule(r); + } + + )* + < RPAREN >/*@bgen(jjtree)*/ + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + throw (RuntimeException)jjte000; + } + if (jjte000 instanceof ParseException) { + throw (ParseException)jjte000; + } + throw (Error)jjte000; + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } +/*@egen*/ +} + +void skip(Rules r) :{/*@bgen(jjtree) MySkip */ + ASTMySkip jjtn000 = new ASTMySkip(JJTMYSKIP); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); +/*@egen*/} +{/*@bgen(jjtree) MySkip */ + try { +/*@egen*/ + { jjtn000.setRule(r); } + < SKIPRECORD > {jjtn000.skipRecord();} + < LPAREN > + < RPAREN >/*@bgen(jjtree)*/ + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } +/*@egen*/ } + +void copy(Rules r) : {/*@bgen(jjtree) MyCopy */ + ASTMyCopy jjtn000 = new ASTMyCopy(JJTMYCOPY); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); +/*@egen*/} +{/*@bgen(jjtree) MyCopy */ + try { +/*@egen*/ { jjtn000.setRule(r); } + < COPY > + < LPAREN > { String templateMatchExpression; String applyTemplateSelectExpression; String copySelectExpression; } + templateMatchExpression = quotedString() + < COMMA > + applyTemplateSelectExpression = quotedString() + < COMMA > + copySelectExpression = quotedString() + < RPAREN >/*@bgen(jjtree)*/ + { + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + } +/*@egen*/ { jjtn000.copy(templateMatchExpression, applyTemplateSelectExpression, copySelectExpression); }/*@bgen(jjtree)*/ + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + throw (RuntimeException)jjte000; + } + if (jjte000 instanceof ParseException) { + throw (ParseException)jjte000; + } + throw (Error)jjte000; + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } +/*@egen*/ + +} + +void op(Rules r) : {/*@bgen(jjtree) MyOp */ + ASTMyOp jjtn000 = new ASTMyOp(JJTMYOP); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); +/*@egen*/String functionName = ""; Token expr; Token expr2; Token vocab; String defaultPattern = ""; String function = ""; String elementName = ""; String regExpr; Token feature;} +{/*@bgen(jjtree) MyOp */ + try { +/*@egen*/ + {jjtn000.setRule(r);} + ( ( + + functionName = identifier() attrib_list() + /*@bgen(jjtree)*/ + { + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + } +/*@egen*/ {jjtn000.getValue(functionName);} + ) + | ( + + expr = inputField() + + vocab = + ( + + defaultPattern = quotedString() + + function = quotedString() )* + /*@bgen(jjtree)*/ + { + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + } +/*@egen*/ {jjtn000.convert(expr.image, vocab.image, defaultPattern, function);} + ) + | (< EXTRACT > + < LPAREN > + feature = < IDENTIFIER > + < RPAREN >/*@bgen(jjtree)*/ + { + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + } +/*@egen*/ {jjtn000.extract(feature.image); } ) + | ( + + expr = inputField() + + expr2 = inputField() + + regExpr = quotedString() + /*@bgen(jjtree)*/ + { + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + } +/*@egen*/ {jjtn000.regExpr(expr.image, expr2.image, regExpr);} + ) + | ( + < LPAREN > + expr = inputField() + < COMMA > + elementName = quotedString() + < COMMA > + regExpr = quotedString() + < RPAREN >/*@bgen(jjtree)*/ + { + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + } +/*@egen*/ {jjtn000.split(expr.image, elementName, regExpr);} + ) + | ( < LOOKUP > { String propertyKey; } + < LPAREN > + expr = inputField() + < COMMA > + propertyKey = quotedString() + < RPAREN >/*@bgen(jjtree)*/ + { + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + } +/*@egen*/ { jjtn000.lookup(expr.image, propertyKey); } + ) + | ( < IDENTIFIEREXTRACT > { String xpathExprJsonString; Token xpathExprInputSource; } + < LPAREN > + // "{//abc, //def }" ?json list of xpath-expr // xpath-expr of input source + // regExpr xpathExprJsonString = singleQuotedString() + < COMMA > + xpathExprInputSource = inputField() + < COMMA > + regExpr = singleQuotedString() + < RPAREN >/*@bgen(jjtree)*/ + { + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + } +/*@egen*/ { jjtn000.identifierExtract(xpathExprJsonString, xpathExprInputSource.image, regExpr); } + ) + | ( < CONCAT > { jjtn000.concat(); } + < LPAREN > { String v; Token t; } + ( v = quotedString() { jjtn000.addConcat(v); } | t = < DOLLAR_QNAME > { jjtn000.addConcat(t.image); } + ) + ( + < COMMA > + ( v = quotedString() { jjtn000.addConcat(v); } | t = < DOLLAR_QNAME > { jjtn000.addConcat(t.image); } + ) )* + < RPAREN > ) + )/*@bgen(jjtree)*/ + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + throw (RuntimeException)jjte000; + } + if (jjte000 instanceof ParseException) { + throw (ParseException)jjte000; + } + throw (Error)jjte000; + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } +/*@egen*/ +} + +String leftExprOutputField() :{ String leftExpr; } +{ + leftExpr = identifier() < EQUAL > { return leftExpr; } +} + +String leftExprVar() :{ String leftExpr; } { + leftExpr = variable() < EQUAL > { return leftExpr; } } + +String leftExprTemplate() :{ String leftExpr; } +{ + leftExpr = template() < EQUAL > { return leftExpr; } } + +String singleQuotedString() : { Token t;} { + t = + < SINGLE_QUOTE > + { return t.image; } } + +String quotedString() : {Token t;} +{ + t = + < QUOTED_STRING > + {return t.image;} +} + +void rule(Rules r) : { String ruleDecl; String leftExpr;} +{ + [ ruleDecl = ruleDecl() { r.setRuleDeclaration(ruleDecl); } ] + ( leftExpr = leftExprOutputField() + { + String[] fieldArray = leftExpr.split("@"); + r.setTargetField(fieldArray[0]); + if (fieldArray.length > 1) { + r.setAttribute(fieldArray[1]); } + } + | leftExpr = leftExprVar() { r.setVariable(leftExpr); } + | leftExpr = leftExprTemplate() { r.setTemplate(leftExpr); } ) + ( + LOOKAHEAD(2) + + assign(r) + | op(r) + | set(r) + | copy(r) + | empty(r) + | skip(r) + ) < SEMICOLON > +} + +String ruleDecl() : {Token t;} +{ + ( + t = < STATIC > + ) + { return t.image; + } +} + +void script() :{} +{ + scriptDeclaration() + [ importDeclaration() ] + ( + nsDeclaration() + )* + ( + preprocess() )* + ( stmt() | conditionalStmt() )* + ( ) +} + +void scriptDeclaration() : +{/*@bgen(jjtree) MyScript */ + ASTMyScript jjtn000 = new ASTMyScript(JJTMYSCRIPT); + boolean jjtc000 = true; + jjtree.openNodeScope(jjtn000); +/*@egen*/String scriptName; ASTMyScript.SCRIPTTYPE scriptType;} +{/*@bgen(jjtree) MyScript */ + try { +/*@egen*/ + + ( < DECLARE_SCRIPT > { + jjtn000.setScriptType(ASTMyScript.SCRIPTTYPE.MAINSCRIPT); } + | < DECLARE_SUBSCRIPT > { + jjtn000.setScriptType(ASTMyScript.SCRIPTTYPE.SUBSCRIPT); } ) scriptName = quotedString() < SEMICOLON >/*@bgen(jjtree)*/ + { + jjtree.closeNodeScope(jjtn000, true); + jjtc000 = false; + } +/*@egen*/ + { + jjtn000.setScript(scriptName); + }/*@bgen(jjtree)*/ + } catch (Throwable jjte000) { + if (jjtc000) { + jjtree.clearNodeScope(jjtn000); + jjtc000 = false; + } else { + jjtree.popNode(); + } + if (jjte000 instanceof RuntimeException) { + throw (RuntimeException)jjte000; + } + if (jjte000 instanceof ParseException) { + throw (ParseException)jjte000; + } + throw (Error)jjte000; + } finally { + if (jjtc000) { + jjtree.closeNodeScope(jjtn000, true); + } + } +/*@egen*/ +} + +void stmt() :{Rules r = new Rules();} +{ + rule(r) +} + +String string() : {Token t;} +{ + t = {return t.image.substring(1, t.image.length()-1);} +} + +String variable() : { Token t; } +{ + t = < DOLLAR_QNAME > { return t.image; } +} + +String template() : { Token t; } +{ + t = < PERCENT_QNAME > { return t.image; } } \ No newline at end of file diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ft.jjt b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ft.jjt new file mode 100644 index 0000000..c773d54 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/parser/ft.jjt @@ -0,0 +1,520 @@ +/** +* JJTree file +* NODE_PACKAGE = "eu.dnetlib.data.collective.transformation.rulelanguage.node"; +*/ + +options { + STATIC=false; + MULTI = true; + VISITOR = true; + JDK_VERSION = "1.6"; +} + +PARSER_BEGIN(FtScript) + +package eu.dnetlib.data.collective.transformation.rulelanguage.parser; +import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; +public class FtScript { +} + +PARSER_END(FtScript) + +SKIP : +{ + " " +| "\t" +| "\n" +| "\r" +| <"//" (~["\n","\r"])* ("\n"|"\r"|"\r\n")> +| <"/*" (~["*"])* "*" (~["/"] (~["*"])* "*")* "/"> +} + +MORE : { + "//" : IN_SINGLE_LINE_COMMENT } + +< IN_SINGLE_LINE_COMMENT > +SPECIAL_TOKEN : +{ + : DEFAULT +} + +/* RESERVED WORDS AND LITERALS */ +// KEYWORDS +TOKEN : { < IMPORT: "import" > } +TOKEN : { < TRANS: "trans" > } +TOKEN : { < IF: "if" > } +TOKEN : { < ELSE: "else" > } +TOKEN : { < END: "end" > } +TOKEN : { < EXTRACT: "Extract" > } +TOKEN : { < IDENTIFIEREXTRACT: "identifierExtract" > } +TOKEN : { < CONVERT: "Convert" > } +TOKEN : { < REGEXPR: "RegExpr" > } +TOKEN : { < GETVALUE: "getValue" > } +TOKEN : { < COPY: "copy" > } +TOKEN : { < DECLARE_NAMESPACE: "declare_ns" > } +TOKEN : { < DECLARE_SCRIPT: "declare_script" > } +TOKEN : { < DECLARE_SUBSCRIPT: "declare_subscript" > } +TOKEN : { < PREPROCESS: "preprocess" > } +TOKEN : { < SET: "set" > } +TOKEN : { < SKIPRECORD: "skipRecord" > } +TOKEN : { < SPLIT: "split" > } +TOKEN : { < STATIC: "static" > } +TOKEN : { < XPATH_SCHEME: "xpath:" > } +TOKEN : { < APPLY: "apply" > } +TOKEN : { < EMPTY: "empty" > } +TOKEN : { < DBLOOKUP: "dblookup" > } +TOKEN : { < BLACKLIST: "blacklist" > } +TOKEN : { < LOOKUP: "lookup" > } +TOKEN : { < CONCAT: "concat" > } + +// SPEC. CHARS +TOKEN : { < SEMICOLON: ";" > } +TOKEN : { < RBRACKET: "]" > } +TOKEN : { < LBRACE: "{" > } +TOKEN : { < RBRACE: "}" > } +TOKEN : { < COMMA: "," > } +TOKEN : { < RPAREN: ")" > } +TOKEN : { < QUOTE: "'" > } + +TOKEN : +{ + + < EQUAL: "=" > | + < NOTEQUAL: "!=" > | + < GT: " > " | ">" > | + < LT: " < " | "<" > | + < GTE: " >= " | ">=" > | + < LTE: " <= " | "<=" > | + < PLUS: "+" > | + < VBAR: "|" > | +// < SLASH_SLASH: "//" > | + < SLASH: "/" > | + + < DOT_DOT: ".." > | + //< DOT: "." > | + + < AT: "@" > | + + < LPAREN: "(" > | + < LBRACKET: "[" > | + < DOLLAR_QNAME: "$" > | + < PERCENT_QNAME: "%" < IDENTIFIER > > + +} + +TOKEN : +{ + < XPATH: + < XPATH_SCHEME > + > +} + +TOKEN : +{ + < JOBCONST: + < JOBCONST_PREFIX > ()* > +| < #JOBCONST_PREFIX: "$job." > } + +TOKEN: { + < QUOTED_STRING: "\"" (~["\""] )+ "\"" > } + +TOKEN: +{ + < SINGLE_QUOTE: "'" + (~["'"] + )+ "'" > } + +TOKEN : /* STRING LITERALS*/ +{ + < CHARACTER_LITERAL: + "'" + ( (~["'","\\","\n","\r"]) + | ("\\" + ( ["n","t","b","r","f","\\","'","\""] + | ["0"-"7"] ( ["0"-"7"] )? + | ["0"-"3"] ["0"-"7"] ["0"-"7"] + ) + ) + ) + "'" + > +| + < STRING_LITERAL: + "\"" + ( (~["\"", "\\", "\n", "\r"]) + | ("\\" + ( ["n", "t", "b", "r", "f", "\\", "'", "\""] + | ["0"-"7"] ( ["0"-"7"] )? + | ["0"-"3"] ["0"-"7"] ["0"-"7"] + ) + ) + )* + "\"" + > +} + + +TOKEN : { + < URI: + < IDENTIFIER > "://" ( "/" < IDENTIFIER > )* ( "/" )? + > } + + + +TOKEN : /* IDENTIFIERS */ +{ + < IDENTIFIER: + ( < AT >|) (||< DOT >|< COLON >|< AT >)* > +| < #LETTER_OR_DIGIT: ["_","a"-"z","A"-"Z","0"-"9"] > +| < #MINUS: "-" > +| < #DOT: "." > +| < #COLON: ":" > +//| < #AT: "@" > +} + + + +ASTStart Start() :{} +{ + (script()) + { return jjtThis; } +} + +/** + * id = value + */ +void assign(Rules r) #MyAssign : {String value = ""; Token t;} +{ + {jjtThis.setRule(r);} + (t = inputField() {jjtThis.setFieldExpression(t.image);} + |value = quotedString() {jjtThis.setAttribute(value);} + ) +} + +void attrib_list() #void : {} +{ + + (attribute())* + +} + + +void attribute() #MyAttribute : {String value; Token t=null;} +{ + (value = identifier() { jjtThis.setAttributeValue(value);} + |t = inputField() + { value = t.image; + jjtThis.setAttributeInputField(value); + } + ) + [] + +} + +/** + * a conditional Rule, which contains the condition, the Rule on which the condition holds and the alternative rule + */ +void conditionalStmt() #MyCondition : {Rules r1 = new Rules(); Rules r2 = new Rules(); Token cond; Token apply;} +{ + [< APPLY > apply = inputField() { + jjtThis.setApplyExpression(apply.image); } ] + < IF > cond = inputField() { jjtThis.setConditionalExpression(cond.image); jjtThis.setPrimaryRule(r1); jjtThis.setSecondaryRule(r2); } + rule(r1) + < ELSE > + rule(r2) +} + +void empty(Rules r) #MyEmpty : {} +{ + {jjtThis.setRule(r);} + < EMPTY > {jjtThis.setEmpty(true);} +} + +String identifier() #void : {Token t;} +{ + t = {return t.image;} +} + +void importDeclaration() #MyImport : +{String scriptName;} +{ + scriptName = identifier() ";" { jjtThis.setScriptName(scriptName); } +} + +Token anyExpression() #void : { Token t; } +{ + t = < IDENTIFIER > { + return t; } } + +Token inputField() #void :{ Token t; } +{ + t = < XPATH > + { + return t; + } + | t = < JOBCONST > { + return t; } + | t = < DOLLAR_QNAME > + { + return t; + } +} + + +/* + * becomes obsolete + */ +String key() #void : {String key;} +{ + key = identifier() + + {return key;} +} + +void literal() #void : +{} +{ + < CHARACTER_LITERAL > | < STRING_LITERAL > +} + +void nsDeclaration() #MyNs : +{String nsPrefix; String nsUri;} +{ + < DECLARE_NAMESPACE > nsPrefix = identifier() < EQUAL > nsUri = quotedString() < SEMICOLON > + { + jjtThis.setNsDeclaration(nsPrefix, nsUri); + } +} + +void preprocess() #MyPreprocess :{String preprocessId;} { + ( < PREPROCESS > + preprocessId = identifier() < EQUAL > + < DBLOOKUP > + < LPAREN > { String sqlExpr; } + sqlExpr = quotedString() + < RPAREN > { jjtThis.preprocess(preprocessId, "dblookup", sqlExpr); } + < SEMICOLON > + ) | + ( + < BLACKLIST > + < LPAREN > { String blacklistDataSourceId; } + blacklistDataSourceId = quotedString() + < RPAREN > { jjtThis.preprocess("blacklist", blacklistDataSourceId); } + < SEMICOLON > ) } + +void set(Rules r) #MySet : {Token expr; String value = "";} +{ + { jjtThis.setRule(r); } + + < SET > + < LPAREN > + ( + expr = inputField() { jjtThis.setValueExpression(expr.image); } + | value = quotedString() { jjtThis.setAttribute(value); } + ) + ( + < COMMA > + { + r = new Rules(); + } + rule(r) + { + jjtThis.addAttributeRule(r); + } + + )* + < RPAREN > +} + +void skip(Rules r) #MySkip :{} +{ + { jjtThis.setRule(r); } + < SKIPRECORD > {jjtThis.skipRecord();} + < LPAREN > + < RPAREN > } + +void copy(Rules r) #MyCopy : {} +{ { jjtThis.setRule(r); } + < COPY > + < LPAREN > { String templateMatchExpression; String applyTemplateSelectExpression; String copySelectExpression; } + templateMatchExpression = quotedString() + < COMMA > + applyTemplateSelectExpression = quotedString() + < COMMA > + copySelectExpression = quotedString() + < RPAREN > { jjtThis.copy(templateMatchExpression, applyTemplateSelectExpression, copySelectExpression); } + +} + +void op(Rules r) #MyOp : {String functionName = ""; Token expr; Token expr2; Token vocab; String defaultPattern = ""; String function = ""; String elementName = ""; String regExpr; Token feature;} +{ + {jjtThis.setRule(r);} + ( ( + + functionName = identifier() attrib_list() + {jjtThis.getValue(functionName);} + ) + | ( + + expr = inputField() + + vocab = + ( + + defaultPattern = quotedString() + + function = quotedString() )* + {jjtThis.convert(expr.image, vocab.image, defaultPattern, function);} + ) + | (< EXTRACT > + < LPAREN > + feature = < IDENTIFIER > + < RPAREN > {jjtThis.extract(feature.image); } ) + | ( + + expr = inputField() + + expr2 = inputField() + + regExpr = quotedString() + {jjtThis.regExpr(expr.image, expr2.image, regExpr);} + ) + | ( + < LPAREN > + expr = inputField() + < COMMA > + elementName = quotedString() + < COMMA > + regExpr = quotedString() + < RPAREN > {jjtThis.split(expr.image, elementName, regExpr);} + ) + | ( < LOOKUP > { String propertyKey; } + < LPAREN > + expr = inputField() + < COMMA > + propertyKey = quotedString() + < RPAREN > { jjtThis.lookup(expr.image, propertyKey); } + ) + | ( < IDENTIFIEREXTRACT > { String xpathExprJsonString; Token xpathExprInputSource; } + < LPAREN > + // "{//abc, //def }" ?json list of xpath-expr // xpath-expr of input source + // regExpr xpathExprJsonString = singleQuotedString() + < COMMA > + xpathExprInputSource = inputField() + < COMMA > + regExpr = singleQuotedString() + < RPAREN > { jjtThis.identifierExtract(xpathExprJsonString, xpathExprInputSource.image, regExpr); } + ) + | ( < CONCAT > { jjtThis.concat(); } + < LPAREN > { String v; Token t; } + ( v = quotedString() { jjtThis.addConcat(v); } | t = < DOLLAR_QNAME > { jjtThis.addConcat(t.image); } + ) + ( + < COMMA > + ( v = quotedString() { jjtThis.addConcat(v); } | t = < DOLLAR_QNAME > { jjtThis.addConcat(t.image); } + ) )* + < RPAREN > ) + ) +} + +String leftExprOutputField() #void :{ String leftExpr; } +{ + leftExpr = identifier() < EQUAL > { return leftExpr; } +} + +String leftExprVar() #void :{ String leftExpr; } { + leftExpr = variable() < EQUAL > { return leftExpr; } } + +String leftExprTemplate() #void :{ String leftExpr; } +{ + leftExpr = template() < EQUAL > { return leftExpr; } } + +String singleQuotedString() #void : { Token t;} { + t = + < SINGLE_QUOTE > + { return t.image; } } + +String quotedString() #void : {Token t;} +{ + t = + < QUOTED_STRING > + {return t.image;} +} + +void rule(Rules r) #void : { String ruleDecl; String leftExpr;} +{ + [ ruleDecl = ruleDecl() { r.setRuleDeclaration(ruleDecl); } ] + ( leftExpr = leftExprOutputField() + { + String[] fieldArray = leftExpr.split("@"); + r.setTargetField(fieldArray[0]); + if (fieldArray.length > 1) { + r.setAttribute(fieldArray[1]); } + } + | leftExpr = leftExprVar() { r.setVariable(leftExpr); } + | leftExpr = leftExprTemplate() { r.setTemplate(leftExpr); } ) + ( + LOOKAHEAD(2) + + assign(r) + | op(r) + | set(r) + | copy(r) + | empty(r) + | skip(r) + ) < SEMICOLON > +} + +String ruleDecl() #void : {Token t;} +{ + ( + t = < STATIC > + ) + { return t.image; + } +} + +void script() #void :{} +{ + scriptDeclaration() + [ importDeclaration() ] + ( + nsDeclaration() + )* + ( + preprocess() )* + ( stmt() | conditionalStmt() )* + ( ) +} + +void scriptDeclaration() #MyScript : +{String scriptName; ASTMyScript.SCRIPTTYPE scriptType;} +{ + + ( < DECLARE_SCRIPT > { + jjtThis.setScriptType(ASTMyScript.SCRIPTTYPE.MAINSCRIPT); } + | < DECLARE_SUBSCRIPT > { + jjtThis.setScriptType(ASTMyScript.SCRIPTTYPE.SUBSCRIPT); } ) scriptName = quotedString() < SEMICOLON > + { + jjtThis.setScript(scriptName); + } +} + +void stmt() #void :{Rules r = new Rules();} +{ + rule(r) +} + +String string() #void : {Token t;} +{ + t = {return t.image.substring(1, t.image.length()-1);} +} + +String variable() #void : { Token t; } +{ + t = < DOLLAR_QNAME > { return t.image; } +} + +String template() #void : { Token t; } +{ + t = < PERCENT_QNAME > { return t.image; } } \ No newline at end of file diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/util/Converter.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/util/Converter.java new file mode 100644 index 0000000..b4bf3aa --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/util/Converter.java @@ -0,0 +1,67 @@ +package eu.dnetlib.data.collective.transformation.rulelanguage.util; + +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.StringTokenizer; + +/** + * @author jochen + * + */ +public class Converter { + + private static final String xpathExpr = "xpath:\""; + //private static final String labelExpr = "label:"; + + public static String getXpathFromLabelExpr(String aElement){ + // TODO validate the argument -> consisting of 3 tokens, delimited by dot + StringTokenizer tokenizer = new StringTokenizer(aElement, "."); + List tokenList = new LinkedList(); + while (tokenizer.hasMoreTokens()){ + tokenList.add(tokenizer.nextToken()); + } + StringBuilder builder = new StringBuilder(); + builder.append("//"); // the xpath-expr + builder.append(tokenList.get(0) + ":"); // the namespace + builder.append(tokenList.get(2)); // the elementname + return builder.toString(); + } + + /** + * extracts a xpath-expression made in a production rule + * @param aElement + * @return xpath expression + */ + public static String getXpathFromXpathExpr(String aElement){ + String xpath = ""; + if (aElement.startsWith(xpathExpr)){ + xpath = aElement.substring(xpathExpr.length(), aElement.length() - 1); + } + return xpath; + } + + public static boolean isXpathReturningString(String aXpathExpr){ + String[] functions = {"concat", "normalize-space", "translate", "substring"}; + for (String fct: functions) + if (aXpathExpr.startsWith(fct)) return true; + return false; + } + + public static String getUnquotedString(String aValue){ + return aValue.substring(1, aValue.length() - 1); + } + + /** + * returns a list of name-space declarations used in xsl + * @param nsPrefixMap - a map of name-space prefixes and their uris + * @return list of name-space declarations + */ + public static List getBoundPrefixes(Map nsPrefixMap){ + List nsList = new LinkedList(); + for (String key: nsPrefixMap.keySet()){ + nsList.add("xmlns:" + key + "=" + "\"" + nsPrefixMap.get(key) + "\" "); + } + return nsList; + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/util/FunctionCall.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/util/FunctionCall.java new file mode 100644 index 0000000..2b6fd65 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/util/FunctionCall.java @@ -0,0 +1,146 @@ +package eu.dnetlib.data.collective.transformation.rulelanguage.util; + +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import org.apache.commons.lang3.StringEscapeUtils; + +import eu.dnetlib.data.collective.transformation.core.xsl.XsltConstants; +import eu.dnetlib.data.collective.transformation.engine.functions.Convert; +import eu.dnetlib.data.collective.transformation.engine.functions.IdentifierExtract; +import eu.dnetlib.data.collective.transformation.engine.functions.Lookup; +import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression; +import eu.dnetlib.data.collective.transformation.engine.functions.Split; +import eu.dnetlib.data.collective.transformation.rulelanguage.Argument; + +/** + * TODO: make this class abstract and function classes (getValue, regexpr, ...) extending this class + * @author jochen + * + */ +public class FunctionCall { + + private String externalFunctionName; + private Map paramMap; + private List paramList; + private String uuid; + private List argList = new LinkedList(); + private boolean isStatic = false; + private boolean doPreprocess = true; + + public FunctionCall(boolean aIsStatic){ + uuid = UUID.randomUUID().toString(); + this.isStatic = aIsStatic; + } + + public FunctionCall(boolean aIsStatic, boolean aDoPreprocess){ + this(aIsStatic); + this.doPreprocess = aDoPreprocess; + } + + public boolean doPreprocess(){ + return this.doPreprocess; + } + + public String getXSLpreparatedFunctionCall(){ + return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", '" + uuid + "', $index" + ")"; + } + + public String getXSLpositionFunctionCall(){ + return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", '" + uuid + "', $index" + ", $posVar" + ")"; + } + + public String getXSLdirectFunctionCall(String aCallId){ + if (externalFunctionName.equals("regExpr")){ + return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", " + this.paramMap.get(RegularExpression.paramExpr1) + ", " + this.paramMap.get(RegularExpression.paramExpr2) + ", '" + this.paramMap.get(RegularExpression.paramRegularExpr) + "')"; + }else if (externalFunctionName.equals("convert")){ + if (this.paramMap.containsKey(Convert.paramDefaultPattern) && this.paramMap.containsKey(Convert.paramFunction)) + return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", " + this.paramMap.get(Convert.paramFieldValue) + ", '" + this.paramMap.get(Convert.paramVocabularyName) + "', '" + this.paramMap.get(Convert.paramDefaultPattern) + "', '" + this.paramMap.get(Convert.paramFunction) + "')"; + else + return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", " + this.paramMap.get(Convert.paramFieldValue) + ", '" + this.paramMap.get(Convert.paramVocabularyName) + "')"; + }else if (externalFunctionName.equals("convertString")){ + if (this.paramMap.containsKey(Convert.paramDefaultPattern) && this.paramMap.containsKey(Convert.paramFunction)) + return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", " + this.paramMap.get(Convert.paramFieldValue) + ", '" + this.paramMap.get(Convert.paramVocabularyName) + "', '" + this.paramMap.get(Convert.paramDefaultPattern) + "', '" + this.paramMap.get(Convert.paramFunction) + "')"; + else + return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", " + this.paramMap.get(Convert.paramFieldValue) + ", '" + this.paramMap.get(Convert.paramVocabularyName) + "')"; + }else if (externalFunctionName.equals("split")){ + return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", " + this.paramMap.get(Split.paramInputExpr) + ", '" + this.paramMap.get(Split.paramRegExpr) + "', '" + aCallId + "')"; + }else if (externalFunctionName.equals("lookup")){ + return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", " + this.paramMap.get(Lookup.paramExprIdentifier) + ", '" + this.paramMap.get(Lookup.paramExprProperty) + "')"; + }else if (externalFunctionName.equals("identifierExtract")){ + return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", '" + StringEscapeUtils.escapeXml10(this.paramMap.get(IdentifierExtract.paramXpathExprJson)) + "', " + this.paramMap.get(IdentifierExtract.paramXpathExprInSource) + ", '" + StringEscapeUtils.escapeXml10(this.paramMap.get(IdentifierExtract.paramRegExpr)) + "')"; + }else{ + throw new IllegalStateException("unsupported function call: " + externalFunctionName); + } + } + + public String getXSLdirectFunctionCallById(String aCallId){ + if (externalFunctionName.equals("split")){ + return XsltConstants.extFuncNS + ":" + externalFunctionName + "(" + "$tf" + ", '" + aCallId + "')"; + }else{ + throw new IllegalStateException("unsupported function call: " + externalFunctionName); + } + } + + public void setExternalFunctionName(String externalFunctionName) { + this.externalFunctionName = externalFunctionName; + } + + public String getExternalFunctionName() { + return externalFunctionName; + } + + public void addArgument(Argument arg){ + this.argList.add(arg); + } + + public void setArguments(List aArgList){ + this.argList = aArgList; + } + + public List getArguments(){ + return this.argList; + } + + public void setParameters(Map parameters) { + this.paramMap = parameters; + } + + public Map getParameters() { + return paramMap; + } + + public String getUuid() { + return uuid; + } + + /** + * @param isStatic the isStatic to set + */ + public void setStatic(boolean isStatic) { + this.isStatic = isStatic; + } + + /** + * @return the isStatic + */ + public boolean isStatic() { + return isStatic; + } + + /** + * @return the paramList + */ + public List getParamList() { + return paramList; + } + + /** + * @param paramList the paramList to set + */ + public void setParamList(List paramList) { + this.paramList = paramList; + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/visitor/AbstractVisitor.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/visitor/AbstractVisitor.java new file mode 100644 index 0000000..712c15c --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/visitor/AbstractVisitor.java @@ -0,0 +1,58 @@ +package eu.dnetlib.data.collective.transformation.rulelanguage.visitor; + +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyAssign; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyAttribute; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyOp; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTStart; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.FtScriptVisitor; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.SimpleNode; + +public abstract class AbstractVisitor implements FtScriptVisitor { + + /* (non-Javadoc) + * @see examples.jjtree.FtScriptVisitor#visit(examples.jjtree.ASTMyAssign, java.lang.Object) + */ + @Override + public Object visit(ASTMyAssign node, Object data) { + // TODO Auto-generated method stub + return null; + } + + /* (non-Javadoc) + * @see examples.jjtree.FtScriptVisitor#visit(examples.jjtree.ASTMyAttribute, java.lang.Object) + */ + @Override + public Object visit(ASTMyAttribute node, Object data) { + // TODO Auto-generated method stub + return null; + } + + + /* (non-Javadoc) + * @see examples.jjtree.FtScriptVisitor#visit(examples.jjtree.ASTMyOp, java.lang.Object) + */ + @Override + public Object visit(ASTMyOp node, Object data) { + // TODO Auto-generated method stub + return null; + } + + /* (non-Javadoc) + * @see examples.jjtree.FtScriptVisitor#visit(examples.jjtree.ASTStart, java.lang.Object) + */ + @Override + public Object visit(ASTStart node, Object data) { + // TODO Auto-generated method stub + return null; + } + + /* (non-Javadoc) + * @see examples.jjtree.FtScriptVisitor#visit(examples.jjtree.SimpleNode, java.lang.Object) + */ + @Override + public Object visit(SimpleNode node, Object data) { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/visitor/RuleLanguageVisitor.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/visitor/RuleLanguageVisitor.java new file mode 100644 index 0000000..ef41552 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/rulelanguage/visitor/RuleLanguageVisitor.java @@ -0,0 +1,306 @@ +package eu.dnetlib.data.collective.transformation.rulelanguage.visitor; + +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import eu.dnetlib.data.collective.transformation.rulelanguage.Argument; +import eu.dnetlib.data.collective.transformation.rulelanguage.Condition; +import eu.dnetlib.data.collective.transformation.rulelanguage.IRule; +import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; +import eu.dnetlib.data.collective.transformation.rulelanguage.RulesSet; +import eu.dnetlib.data.collective.transformation.rulelanguage.Argument.Type; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyAssign; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyAttribute; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyCondition; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyCopy; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyEmpty; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyImport; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyNs; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyOp; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyPreprocess; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyScript; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyScript.SCRIPTTYPE; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMySet; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMySkip; +import eu.dnetlib.data.collective.transformation.rulelanguage.util.Converter; +import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall; + +/** + * Implementation of the visitor pattern; maps production rules into Java data structures + * @author jochen + * + */ +public class RuleLanguageVisitor extends AbstractVisitor{ + + private static final Log log = LogFactory.getLog(RuleLanguageVisitor.class); + + private String scriptName = ""; + private ASTMyScript.SCRIPTTYPE scriptType; + private Map> elementMappingRules = new LinkedHashMap>(); + private Map variableMappingRules = new LinkedHashMap(); + private Map templateMappingRules = new LinkedHashMap(); + private List importedScriptList = new LinkedList(); + private List functionCallList = new LinkedList(); + private Map namespaceDeclMap = new HashMap(); + private List> preprocessingMap = new LinkedList>(); + + + /** + * @return the name of the rule script + */ + public String getScriptName(){ + return this.scriptName; + } + + /** + * @return the type of the script + */ + public SCRIPTTYPE getScriptType(){ + return this.scriptType; + } + + /** + * @return the mapping of all rules + */ + public Map> getElementMappingRules(){ + return this.elementMappingRules; + } + + public Map getVariableMappingRules(){ + return this.variableMappingRules; + } + + public Map getTemplateMappingRules(){ + return this.templateMappingRules; + } + + /** + * @return the list of function calls - this is a subset of the rule mapping + */ + public List getFunctionCalls(){ + return this.functionCallList; + } + + /** + * @return the list of scripts that are declared as import + */ + public List getImportedScripts(){ + return this.importedScriptList; + } + + /** + * @return the map of name-space declarations made in the script + */ + public Map getNamespaceDeclarations(){ + return this.namespaceDeclMap; + } + + /** + * @return the map of preprocessings (functions, parameters) + */ + public List> getPreprocessings(){ + return this.preprocessingMap; + } + + public Object visit(ASTMyAssign node, Object data) { + String attrValue = ""; + String fieldExprValue = ""; + Rules r = node.getRule(); + + if (node.isFieldExpression()){ + // todo e.g. convert field-expression into a xpath-expression + fieldExprValue = node.getFieldExpression(); + if (fieldExprValue.startsWith("xpath:")){ + fieldExprValue = Converter.getXpathFromXpathExpr(fieldExprValue); + r.setXpath(fieldExprValue); + }else if (fieldExprValue.startsWith("$") && !fieldExprValue.startsWith("$job.")){ + // variable + log.debug("ruleLangVisitor: assign variable:" + fieldExprValue); + r.setAssignmentVariable(fieldExprValue); + } + }else if (node.isAttribute()){ + attrValue = node.getValue(); + }else { + // shouldn't happen + attrValue = "value not defined: " + node.getValue(); + } + r.setConstant(attrValue); + if (r.getUniqueName().trim().length() > 0){ + addRule(r, r.getUniqueName()); + } + return null; + } + + @Override + public Object visit(ASTMyCopy node, Object data) { + Rules r = node.getRule(); + r.setTemplateMatch(node.getTemplateMatchName()); + r.getProperties().setProperty("applyTemplateSelectExpression", node.getApplyTemplateSelectExpression()); + r.getProperties().setProperty("copySelectExpression", node.getCopySelectExpression()); + addRule(r, r.getUniqueName()); + return null; + } + + @Override + public Object visit(ASTMySet aNode, Object aData) { + log.debug("called method: RuleLanguageVisitor.visit(ASTMySet)"); + // check if the outputfield is the same in rules of this production + Rules myRule = aNode.getRule(); + + if (aNode.isValueExpression()){ + // todo e.g. convert field-expression into a xpath-expression + String exprValue = aNode.getValueExpression(); + if (exprValue.startsWith("xpath:")){ + exprValue = Converter.getXpathFromXpathExpr(exprValue); + myRule.setXpath(exprValue); + }else if (exprValue.startsWith("$") && !exprValue.startsWith("$job.")){ + // variable + log.debug("ruleLangVisitor: assign variable:" + exprValue); + myRule.setAssignmentVariable(exprValue); + } + } + + List rules = aNode.getRules(); + RulesSet set = new RulesSet(); + //set.setPrimaryRule(rules.get(0)); + myRule.setRulesSet(set); + log.debug("rulelangvisitor rule name: " + myRule.getUniqueName() + " , hasSet : " + myRule.hasSet()); + set.getPendingRules().addAll(rules); + addRule(myRule, myRule.getUniqueName()); + return null; + } + + @Override + public Object visit(ASTMyEmpty node, Object data) { + Rules r = node.getRule(); + r.setEmpty(node.isEmpty()); + addRule(r, r.getUniqueName()); + return null; + } + + + /** + * vist a production rule that is defined as an operation or external function call + * @see eu.dnetlib.data.collective.transformation.rulelanguage.visitor.AbstractVisitor#visit(eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyOp, java.lang.Object) + */ + public Object visit(ASTMyOp node, Object data) { + Rules r = node.getRule(); + r.setFunctionCall(node.createFunctionCall(r.isStatic())); + functionCallList.add(r.getFunctionCall()); + log.debug("fc name: " + r.getFunctionCall().getExternalFunctionName()); + for(int i =0; i < node.jjtGetNumChildren(); i++) { + ASTMyAttribute sn = (ASTMyAttribute)node.jjtGetChild(i); + Argument arg = null; + if (sn.getAttributeValue() != null){ + arg = new Argument(Type.VALUE, sn.getAttributeValue()); + }else if (sn.getAttributeInputField() != null){ + if (sn.getAttributeInputField().startsWith("xpath:")){ + arg = new Argument(Type.INPUTFIELD, Converter.getXpathFromXpathExpr(sn.getAttributeInputField())); + }else if (sn.getAttributeInputField().startsWith("$job.")){ + // job constant + arg = new Argument(Type.JOBCONST, sn.getAttributeInputField()); + }else{ + // variable + arg = new Argument(Type.VAR, sn.getAttributeInputField()); + } + }else{ + throw new IllegalStateException("Argument with neither value nor inputfield is not allowed."); + } + log.debug("argument: " + arg.getArgument()); + r.getFunctionCall().addArgument(arg); + } + if (r.getFunctionCall().getParameters() != null){ + Set keys = r.getFunctionCall().getParameters().keySet(); + for (String key: keys){ + log.debug("key: " + key + " , value: " + r.getFunctionCall().getParameters().get(key)); + } + } + log.debug("add rule with declaration: " + r.getRuleDeclaration()); + addRule(r, r.getUniqueName()); + + return null; + } + + @Override + public Object visit(ASTMyImport node, Object data) { + importedScriptList.add(node.getScriptName()); + return null; + } + + @Override + public Object visit(ASTMyNs node, Object data) { + namespaceDeclMap.put(node.getNsPrefix(), node.getNsUri()); + return null; + } + + @Override + public Object visit(ASTMyScript node, Object data) { + this.scriptName = node.getScript(); + this.scriptType = node.getScriptType(); + return null; + } + + @Override + public Object visit(ASTMyCondition node, Object data) { + Condition condition = new Condition(); + if (node.getApplyExpression().length() > 0){ + String applyExpr = Converter.getXpathFromXpathExpr(node.getApplyExpression()); + condition.setApplyExpression(applyExpr); + } + + String conditionalExpr = Converter.getXpathFromXpathExpr(node.getConditionalExpression()); + condition.setConditionExpression(conditionalExpr); + condition.setPrimaryRule(node.getPrimaryRule()); + node.getPrimaryRule().setCondition(condition); + condition.setSecondaryRule(node.getSecondaryRule()); + node.getSecondaryRule().setCondition(condition); + return null; + } + + @Override + public Object visit(ASTMyPreprocess node, Object data) { + Map functionMap = new HashMap(); + functionMap.put(node.getFunctionName(), node.getParameter()); + preprocessingMap.add(functionMap); + return null; + } + + + + + private void addRule(IRule rule, String key){ + log.debug("add rule with key: " + key); + Set ruleSet = null; + if (rule.definesVariable()){ + variableMappingRules.put(key, rule); + }else if (rule.definesTemplate()){ + templateMappingRules.put(key, rule); + }else{ + if (elementMappingRules.containsKey(key)){ + ruleSet = elementMappingRules.get(key); + }else{ + ruleSet = new LinkedHashSet(); + elementMappingRules.put(key, ruleSet); + } + ruleSet.add(rule); + } + } + + @Override + public Object visit(ASTMySkip node, Object data) { + Rules r = node.getRule(); + r.setSkip(true); + addRule(r, r.getUniqueName()); // ??? actually no targetField defined + return null; + } + + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/utils/BlacklistConsumer.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/utils/BlacklistConsumer.java new file mode 100644 index 0000000..2cad52f --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/utils/BlacklistConsumer.java @@ -0,0 +1,30 @@ +package eu.dnetlib.data.collective.transformation.utils; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.URL; +import java.util.LinkedList; +import java.util.List; + +public class BlacklistConsumer { + + public List getBlackList(String apiURL){ + List blacklist = new LinkedList(); + try{ + URL blacklistApi = new URL(apiURL); + InputStream in = blacklistApi.openStream(); + BufferedReader reader = new BufferedReader(new InputStreamReader(in)); + String line; + while((line = reader.readLine()) != null) { + blacklist.add(line); + } + System.out.println(blacklist.size()); + System.out.println(blacklist.get(0)); + }catch(IOException e){ + throw new IllegalStateException("error in blacklist api: " + e.getMessage()); + } + return blacklist; + } +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/utils/NamespaceContextImpl.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/utils/NamespaceContextImpl.java new file mode 100644 index 0000000..592e920 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/utils/NamespaceContextImpl.java @@ -0,0 +1,48 @@ +package eu.dnetlib.data.collective.transformation.utils; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import javax.xml.XMLConstants; +import javax.xml.namespace.NamespaceContext; + +public class NamespaceContextImpl implements NamespaceContext { + + private Map nsMap = new HashMap(); + + public void addNamespace(String aPrefix, String aURI){ + nsMap.put(aPrefix, aURI); + } + + @Override + public String getNamespaceURI(String aPrefix) { + return nsMap.get(aPrefix); + } + + @Override + public String getPrefix(String aNamespaceURI) { + if (aNamespaceURI == null){ + throw new IllegalStateException(); + } + if (aNamespaceURI.equals(XMLConstants.XML_NS_URI)){ + return XMLConstants.XML_NS_PREFIX; + }else if (aNamespaceURI.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)){ + return XMLConstants.XMLNS_ATTRIBUTE; + }else if (nsMap.values().contains(aNamespaceURI)){ + for (String prefix: nsMap.keySet()){ + if (nsMap.get(prefix).equals(aNamespaceURI)){ + return prefix; + } + } + } + return null; + } + + @Override + public Iterator getPrefixes(String arg0) { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/utils/TransformationRulesImportTool.java b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/utils/TransformationRulesImportTool.java new file mode 100644 index 0000000..23b60ff --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/collective/transformation/utils/TransformationRulesImportTool.java @@ -0,0 +1,88 @@ +package eu.dnetlib.data.collective.transformation.utils; + +import java.io.StringReader; +import java.util.List; + +import org.apache.commons.lang3.StringEscapeUtils; + +import eu.dnetlib.common.profile.ProfileNotFoundException; +import eu.dnetlib.data.collective.transformation.rulelanguage.RuleLanguageParser; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import eu.dnetlib.enabling.tools.ServiceLocator; + +/** + * + * @author jochen + * @since 1.2 + */ +public class TransformationRulesImportTool { + private ServiceLocator lookupServiceLocator; + + /** + * retrieves the transformation rule script of a transformation rule profile identified by a profile id + * @param aProfileId + * @return list of the transformation rule script and optionally profile id's of subscripts + * @throws ProfileNotFoundException + */ + protected List getScript(String aProfileId) throws ProfileNotFoundException{ + String xquery = "collection('/db/DRIVER/TransformationRuleDSResources')//RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value ='" + + aProfileId + "']//CODE/child::node(), " + + "for $id in (collection('/db/DRIVER/TransformationRuleDSResources')//RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value ='" + + aProfileId + "']//IMPORTED/SCRIPT_REFERENCE/@id) return string($id)"; + List queryResult; + try { + queryResult = lookupServiceLocator.getService().quickSearchProfile(xquery); + if (!queryResult.isEmpty()){ + return queryResult; + }else{ + throw new ProfileNotFoundException("no script found in profile for profileId: " + aProfileId); + } + } catch (ISLookUpException e) { + throw new ProfileNotFoundException(e); + } + } + + protected void importRules(RuleLanguageParser aParser, String aProfileId) throws ProfileNotFoundException{ + List profileQueryResult = getScript(aProfileId); + String script = StringEscapeUtils.unescapeXml(profileQueryResult.get(0)); // the first entry contains the script + if (script.trim().startsWith(" { + + /** + * logger. + */ + private static final Log log = LogFactory.getLog(SimpleDataTransformer.class); + + /** + * Transformation rule profile + */ + private String ruleProfile; + private SimpleTransformationEngine transformationEngine; + + public SimpleDataTransformer(final String ruleProfile) { + this.ruleProfile = ruleProfile; + + // TODO + // instantiate here the xml transformer + + if (log.isDebugEnabled()) { + log.debug("************************************************************"); + log.debug("New transformer created from profile " + ruleProfile); + log.debug("************************************************************"); + } + + } + + @Override + public String evaluate(String record) { + if (log.isDebugEnabled()) { + log.debug("************************************************************"); + log.debug("INPUT: " + record); + log.debug("************************************************************"); + } + + final String output = transform(record); + + if (log.isDebugEnabled()) { + log.debug("************************************************************"); + log.debug("OUTPUT: " + output); + log.debug("************************************************************"); + } + + return output; + } + + private String transform(String record) { + // use here the xml transformer + return transformationEngine.transform(record); + } + + protected void setupEngine(VocabularyRegistry vocabularyRegistry, Resource transformationTemplate, + Resource defaultSchema, TransformationRulesImportTool rulesProfileUtil, ResourceDao resourceDao, Resource blacklistApi)throws TransformerConfigurationException, ProfileNotFoundException{ + transformationEngine = new SimpleTransformationEngine(); + transformationEngine.setVocabularyRegistry(vocabularyRegistry); + TransformationImpl transformation = new TransformationImpl(); + transformation.setSchema(defaultSchema); + transformation.setTemplate(transformationTemplate); + transformation.init(); + if (log.isDebugEnabled()) { + log.debug("************************************************************"); + log.debug(ruleProfile); + log.debug("************************************************************"); + } + transformation.setRuleLanguageParser(rulesProfileUtil.getRuleLanguageParser(ruleProfile)); + transformation.configureTransformation(); + transformationEngine.setTransformation(transformation); + transformationEngine.setResourceDao(resourceDao); + transformationEngine.setBlacklistApi(blacklistApi); + } + + public String getRuleProfile() { + return ruleProfile; + } + + public void setRuleProfile(String ruleProfile) { + this.ruleProfile = ruleProfile; + } + +} diff --git a/dnet-data-services/src/main/java/eu/dnetlib/data/transformation/service/TransformationServiceImpl.java b/dnet-data-services/src/main/java/eu/dnetlib/data/transformation/service/TransformationServiceImpl.java new file mode 100644 index 0000000..ddc0666 --- /dev/null +++ b/dnet-data-services/src/main/java/eu/dnetlib/data/transformation/service/TransformationServiceImpl.java @@ -0,0 +1,37 @@ +package eu.dnetlib.data.transformation.service; + +import javax.annotation.Resource; +import javax.xml.ws.wsaddressing.W3CEndpointReference; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import eu.dnetlib.data.transformation.service.rmi.TransformationService; +import eu.dnetlib.data.transformation.service.rmi.TransformationServiceException; +import eu.dnetlib.enabling.resultset.MappedResultSetFactory; +import eu.dnetlib.enabling.tools.AbstractBaseService; + +public class TransformationServiceImpl extends AbstractBaseService implements TransformationService { + + @Resource + private MappedResultSetFactory mappedResultSetFactory; + + @Resource + private DataTransformerFactory dataTransformerFactory; + + /** + * logger. + */ + private static final Log log = LogFactory.getLog(TransformationServiceImpl.class); + + @Override + public W3CEndpointReference transform(String ruleid, W3CEndpointReference epr) throws TransformationServiceException { + try { + return mappedResultSetFactory.createMappedResultSet(epr, dataTransformerFactory.createTransformer(ruleid)); + } catch (Exception e) { + log.error("Error generating mapped resultset - ruleId: " + ruleid, e); + throw new TransformationServiceException("Error generating mapped resultset - ruleId: " + ruleid, e); + } + } + +} diff --git a/dnet-data-services/src/main/java/prototype/Person.java b/dnet-data-services/src/main/java/prototype/Person.java new file mode 100644 index 0000000..9ce3bb5 --- /dev/null +++ b/dnet-data-services/src/main/java/prototype/Person.java @@ -0,0 +1,165 @@ +package prototype; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.text.Normalizer; +import java.util.List; +import java.util.Set; + +import prototype.utils.Capitalize; +import prototype.utils.DotAbbreviations; + +import com.google.common.base.Joiner; +import com.google.common.base.Splitter; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.common.hash.Hashing; + +//import eu.dnetlib.pace.clustering.NGramUtils; +//import eu.dnetlib.pace.util.Capitalise; +//import eu.dnetlib.pace.util.DotAbbreviations; + +public class Person { + private List name = Lists.newArrayList(); + private List surname = Lists.newArrayList(); + private List fullname = Lists.newArrayList(); + + private static Set particles = null; + + public Person(String s) { + s = Normalizer.normalize(s, Normalizer.Form.NFD); // was NFD + s = s.replaceAll("\\(.+\\)", ""); + s = s.replaceAll("\\[.+\\]", ""); + s = s.replaceAll("\\{.+\\}", ""); + s = s.replaceAll("\\s+-\\s+", "-"); + + +// s = s.replaceAll("[\\W&&[^,-]]", " "); + +// System.out.println("class Person: s: " + s); + +// s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}&&[^,-]]", " "); + s = s.replaceAll("[\\p{Punct}&&[^-,]]", " "); + s = s.replaceAll("\\d", " "); + s = s.replaceAll("\\n", " "); + s = s.replaceAll("\\.", " "); + s = s.replaceAll("\\s+", " "); + + if (s.contains(",")) { + // System.out.println("class Person: s: " + s); + + String[] arr = s.split(","); + if (arr.length == 1) { + fullname = splitTerms(arr[0]); + } else if (arr.length > 1) { + surname = splitTerms(arr[0]); + name = splitTermsFirstName(arr[1]); +// System.out.println("class Person: surname: " + surname); +// System.out.println("class Person: name: " + name); + + fullname.addAll(surname); + fullname.addAll(name); + } + } else { + fullname = splitTerms(s); + + int lastInitialPosition = fullname.size(); + boolean hasSurnameInUpperCase = false; + + for (int i = 0; i < fullname.size(); i++) { + String term = fullname.get(i); + if (term.length() == 1) { + lastInitialPosition = i; + } else if (term.equals(term.toUpperCase())) { + hasSurnameInUpperCase = true; + } + } + if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini + name = fullname.subList(0, lastInitialPosition + 1); + System.out.println("name: " + name); + surname = fullname.subList(lastInitialPosition + 1, fullname.size()); + } else if (hasSurnameInUpperCase) { // Case: Michele ARTINI + for (String term : fullname) { + if (term.length() > 1 && term.equals(term.toUpperCase())) { + surname.add(term); + } else { + name.add(term); + } + } + } else if (lastInitialPosition == fullname.size()){ + surname = fullname.subList(lastInitialPosition - 1, fullname.size()); + name = fullname.subList(0, lastInitialPosition - 1); + } + + } + } + + private List splitTermsFirstName(String s){ + List list = Lists.newArrayList(); + for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) { + if (s.trim().matches("\\p{Lu}{2,3}")){ + String[] parts = s.trim().split("(?=\\p{Lu})"); // (Unicode UpperCase) + for (String p: parts){ + if (p.length() > 0) + list.add(p); + } + }else{ + list.add(part); + } + + } + return list; + } + + private List splitTerms(String s) { + if (particles == null) { +// particles = NGramUtils.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt"); + } + + List list = Lists.newArrayList(); + for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) { + // if (!particles.contains(part.toLowerCase())) { + list.add(part); + + // } + } + return list; + } + + public List getName() { + return name; + } + + public List getSurname() { + return surname; + } + + public List getFullname() { + return fullname; + } + + public String hash() { + return Hashing.murmur3_128().hashString(getNormalisedFullname(),StandardCharsets.UTF_8).toString(); + } + + public String getNormalisedFullname() { + return isAccurate() ? + Joiner.on(" ").join(getSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) : + Joiner.on(" ").join(fullname); +// return isAccurate() ? +// Joiner.on(" ").join(getCapitalSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) : +// Joiner.on(" ").join(fullname); + } + + public List getCapitalSurname() { + return Lists.newArrayList(Iterables.transform(surname, new Capitalize() )); + } + + public List getNameWithAbbreviations() { + return Lists.newArrayList(Iterables.transform(name, new DotAbbreviations() )); + } + + public boolean isAccurate() { + return (name != null && surname != null && !name.isEmpty() && !surname.isEmpty()); + } +} diff --git a/dnet-data-services/src/main/java/prototype/PersonOrig.java b/dnet-data-services/src/main/java/prototype/PersonOrig.java new file mode 100644 index 0000000..4d14e96 --- /dev/null +++ b/dnet-data-services/src/main/java/prototype/PersonOrig.java @@ -0,0 +1,129 @@ +package prototype; + +import java.nio.charset.StandardCharsets; +import java.text.Normalizer; +import java.util.List; +import java.util.Set; + +import prototype.utils.Capitalize; +import prototype.utils.DotAbbreviations; + +import com.google.common.base.Joiner; +import com.google.common.base.Splitter; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.common.hash.Hashing; + +//import eu.dnetlib.pace.clustering.NGramUtils; +//import eu.dnetlib.pace.util.Capitalise; +//import eu.dnetlib.pace.util.DotAbbreviations; + +public class PersonOrig { + private List name = Lists.newArrayList(); + private List surname = Lists.newArrayList(); + private List fullname = Lists.newArrayList(); + + private static Set particles = null; + + public PersonOrig(String s) { + s = Normalizer.normalize(s, Normalizer.Form.NFD); + s = s.replaceAll("\\(.+\\)", ""); + s = s.replaceAll("\\[.+\\]", ""); + s = s.replaceAll("\\{.+\\}", ""); + s = s.replaceAll("\\s+-\\s+", "-"); + s = s.replaceAll("[\\W&&[^,-]]", " "); + s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}&&[^,-]]", " "); + s = s.replaceAll("[\\p{Punct}&&[^-,]]", " "); + s = s.replaceAll("\\d", " "); + s = s.replaceAll("\\n", " "); + s = s.replaceAll("\\.", " "); + s = s.replaceAll("\\s+", " "); + + if (s.contains(",")) { + String[] arr = s.split(","); + if (arr.length == 1) { + fullname = splitTerms(arr[0]); + } else if (arr.length > 1) { + surname = splitTerms(arr[0]); + name = splitTerms(arr[1]); + fullname.addAll(surname); + fullname.addAll(name); + } + } else { + fullname = splitTerms(s); + + int lastInitialPosition = fullname.size(); + boolean hasSurnameInUpperCase = false; + + for (int i = 0; i < fullname.size(); i++) { + String term = fullname.get(i); + if (term.length() == 1) { + lastInitialPosition = i; + } else if (term.equals(term.toUpperCase())) { + hasSurnameInUpperCase = true; + } + } + + if (lastInitialPosition < fullname.size() - 1) { // Case: Michele G. Artini + name = fullname.subList(0, lastInitialPosition + 1); + surname = fullname.subList(lastInitialPosition + 1, fullname.size()); + } else if (hasSurnameInUpperCase) { // Case: Michele ARTINI + for (String term : fullname) { + if (term.length() > 1 && term.equals(term.toUpperCase())) { + surname.add(term); + } else { + name.add(term); + } + } + } + } + } + + private List splitTerms(String s) { +// if (particles == null) { +// particles = NGramUtils.loadFromClasspath("/eu/dnetlib/pace/config/name_particles.txt"); +// } + + List list = Lists.newArrayList(); + for (String part : Splitter.on(" ").omitEmptyStrings().split(s)) { +// if (!particles.contains(part.toLowerCase())) { + list.add(part); +// } + } + return list; + } + + public List getName() { + return name; + } + + public List getSurname() { + return surname; + } + + public List getFullname() { + return fullname; + } + + public String hash() { + return Hashing.murmur3_128().hashString(getNormalisedFullname(), StandardCharsets.UTF_8).toString(); + } + + public String getNormalisedFullname() { + return isAccurate() ? + Joiner.on(" ").join(getCapitalSurname()) + ", " + Joiner.on(" ").join(getNameWithAbbreviations()) : + Joiner.on(" ").join(fullname); + } + + public List getCapitalSurname() { + return Lists.newArrayList(Iterables.transform(surname, new Capitalize())); + } + + public List getNameWithAbbreviations() { + return Lists.newArrayList(Iterables.transform(name, new DotAbbreviations())); + } + + public boolean isAccurate() { + return (name != null && surname != null && !name.isEmpty() && !surname.isEmpty()); + } +} \ No newline at end of file diff --git a/dnet-data-services/src/main/java/prototype/utils/Capitalize.java b/dnet-data-services/src/main/java/prototype/utils/Capitalize.java new file mode 100644 index 0000000..d00e193 --- /dev/null +++ b/dnet-data-services/src/main/java/prototype/utils/Capitalize.java @@ -0,0 +1,13 @@ +package prototype.utils; + +import org.apache.commons.lang3.text.WordUtils; + +import com.google.common.base.Function; + +public class Capitalize implements Function{ + + @Override + public String apply(String s){ + return WordUtils.capitalize(s.toLowerCase()); + } +} diff --git a/dnet-data-services/src/main/java/prototype/utils/DotAbbreviations.java b/dnet-data-services/src/main/java/prototype/utils/DotAbbreviations.java new file mode 100644 index 0000000..a9f2b8b --- /dev/null +++ b/dnet-data-services/src/main/java/prototype/utils/DotAbbreviations.java @@ -0,0 +1,11 @@ +package prototype.utils; + +import com.google.common.base.Function; + +public class DotAbbreviations implements Function{ + + @Override + public String apply(String s){ + return s.length() == 1 ? s + "." : s; + } +} diff --git a/dnet-data-services/src/main/resources/META-INF/services/javax.xml.transform.TransformerFactory b/dnet-data-services/src/main/resources/META-INF/services/javax.xml.transform.TransformerFactory new file mode 100644 index 0000000..1da653d --- /dev/null +++ b/dnet-data-services/src/main/resources/META-INF/services/javax.xml.transform.TransformerFactory @@ -0,0 +1 @@ +com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl \ No newline at end of file diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/applicationContext-dnet-data-transformation-service.xml b/dnet-data-services/src/main/resources/eu/dnetlib/applicationContext-dnet-data-transformation-service.xml new file mode 100644 index 0000000..3db65e8 --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/applicationContext-dnet-data-transformation-service.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/applicationContext-transformation-common.xml b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/applicationContext-transformation-common.xml new file mode 100644 index 0000000..2111ba5 --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/applicationContext-transformation-common.xml @@ -0,0 +1,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/applicationContext-transformation.properties b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/applicationContext-transformation.properties new file mode 100644 index 0000000..f10a970 --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/applicationContext-transformation.properties @@ -0,0 +1,8 @@ +# additional service dependencies +services.transformation.defaulttemplate = /eu/dnetlib/data/collective/transformation/engine/oaftemplate.xsl +services.transformation.defaultschema = /eu/dnetlib/data/collective/transformation/schema/OAFSchema_vTransformator.xsd +# +services.transformation.blacklist_api = http://localhost:8080/validator-service/worfklows?request=GetBlacklistedRecords&datasourceId= +# expected json format: {"map":{"name_of_the_vocabulary_as_used_in_the_transformationRuleScript":{"name":"name of the vocabulary as used in the vocabulary profile", "caseSensitive":"true or false", "delimiter":"optional parameter"}}} +# services.transformation.vocabularyproperties.json = {"map":{"Languages":{"name":"Names of Languages", "caseSensitive":"false", "delimiter":"/"}, "TextTypologies":{"name":"Names of Text Object Typologies", "caseSensitive":"false"}}} +services.transformation.vocabularyproperties.json = {"map":{"AccessRights":{"name":"dnet:access_modes", "caseSensitive":"false"}, "Languages":{"name":"dnet:languages", "caseSensitive":"false", "delimiter":"/"}, "TextTypologies":{"name":"dnet:publication_resource", "caseSensitive":"false"}, "SuperTypes":{"name":"dnet:result_typologies", "caseSensitive":"false"}}} diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/identity.xsl b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/identity.xsl new file mode 100644 index 0000000..e957251 --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/identity.xsl @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/oaftemplate.xsl b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/oaftemplate.xsl new file mode 100644 index 0000000..ffa78f4 --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/oaftemplate.xsl @@ -0,0 +1,15 @@ + + + + + + + + + + diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/syntaxcheckfailed.xsl b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/syntaxcheckfailed.xsl new file mode 100644 index 0000000..b68cc3d --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/syntaxcheckfailed.xsl @@ -0,0 +1,14 @@ + + + + + + + + + failed + + + + + \ No newline at end of file diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/template.xsl b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/template.xsl new file mode 100644 index 0000000..f598360 --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/engine/template.xsl @@ -0,0 +1,18 @@ + + + + + + + + + + + + + diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DMFSchema.xsd b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DMFSchema.xsd new file mode 100644 index 0000000..8e4a68e --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DMFSchema.xsd @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DMFSchema_vTransformator.xsd b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DMFSchema_vTransformator.xsd new file mode 100644 index 0000000..6d64347 --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DMFSchema_vTransformator.xsd @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DMF_OAI.xsd b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DMF_OAI.xsd new file mode 100644 index 0000000..db0ad03 --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DMF_OAI.xsd @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_DC.xsd b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_DC.xsd new file mode 100644 index 0000000..40d29f7 --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_DC.xsd @@ -0,0 +1,154 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_DR.xsd b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_DR.xsd new file mode 100644 index 0000000..8a3b702 --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_DR.xsd @@ -0,0 +1,151 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_DRI.xsd b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_DRI.xsd new file mode 100644 index 0000000..74185e1 --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_DRI.xsd @@ -0,0 +1,68 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_OAI-ProvenanceInfoSchema.xsd b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_OAI-ProvenanceInfoSchema.xsd new file mode 100644 index 0000000..12f91e8 --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/DRIVER_OAI-ProvenanceInfoSchema.xsd @@ -0,0 +1,44 @@ + + + + + + Schema for the description of the provenance of metadata that is + re-exposed by an OAI repository, i.e. metadata that has previously + been harvested before being exposed by the repository. + See: http://www.openarchives.org/OAI/2.0/guidelines-branding.htm + Validated with http://www.w3.org/2001/03/webdata/xsv on 16May2002 + Simeon Warner - $Date: 2002/05/16 19:48:39 $ + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/OAFSchema_vTransformator.xsd b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/OAFSchema_vTransformator.xsd new file mode 100644 index 0000000..35c5c8d --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/OAFSchema_vTransformator.xsd @@ -0,0 +1,90 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/OPENAIRE_OAF.xsd b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/OPENAIRE_OAF.xsd new file mode 100644 index 0000000..a8ffe8f --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/data/collective/transformation/schema/OPENAIRE_OAF.xsd @@ -0,0 +1,163 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + deprecated by collectedDatasourceid and hostingDatasourceid + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/enabling/views/inspector/transform.st b/dnet-data-services/src/main/resources/eu/dnetlib/enabling/views/inspector/transform.st new file mode 100644 index 0000000..f4043cf --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/enabling/views/inspector/transform.st @@ -0,0 +1,36 @@ +$inspector/master(it={ + + + +

Test transformation:

+ +
+Transformation rules: +

+ +Input Record:
+ +

+ +
+
+ +Output Record:
+ + + +})$ \ No newline at end of file diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/functionality/modular/ui/views/ui/transform.st b/dnet-data-services/src/main/resources/eu/dnetlib/functionality/modular/ui/views/ui/transform.st new file mode 100644 index 0000000..a757cfa --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/functionality/modular/ui/views/ui/transform.st @@ -0,0 +1,44 @@ +$common/master( + +header={ + + +}, +onLoad={}, +body={ +
+
+
+
+ + +
+
+ + +
+ + + + + show transformation rule + +
+ +
+ +

+ +

{{outputRecord}}
+

+ +

+ +

{{error.stacktrace}}
+

+
+
+} +)$ diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/test/schemas/TransformationRuleDSResourceType.xsd b/dnet-data-services/src/main/resources/eu/dnetlib/test/schemas/TransformationRuleDSResourceType.xsd new file mode 100644 index 0000000..a096ab7 --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/test/schemas/TransformationRuleDSResourceType.xsd @@ -0,0 +1,108 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/web/resources/js/transform/transform.js b/dnet-data-services/src/main/resources/eu/dnetlib/web/resources/js/transform/transform.js new file mode 100644 index 0000000..d8112b7 --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/web/resources/js/transform/transform.js @@ -0,0 +1,28 @@ +var module = angular.module('transformUI', []); + +module.controller('transformCtrl', function ($scope, $http) { + $scope.outputRecord = ''; + $scope.error = {}; + + $scope.transform = function(rule, s) { + $scope.outputRecord = ''; + $scope.error = {}; + $http.defaults.headers.post["Content-Type"] = "application/json; charset=UTF-8"; + $http.post('transform/transform.do?rule=' + encodeURIComponent(rule), s).success(function(res) { + if(res) { + $scope.outputRecord = res; + } else { + $scope.error({ + 'message' : 'empty response', + }); + } + }).error(function(err) { + $scope.error = err; + }); + + } +}); + +module.filter('encodeURIComponent', function() { + return window.encodeURIComponent; +}); diff --git a/dnet-data-services/src/main/resources/eu/dnetlib/webContext-dnet-data-transformation-inspector.xml b/dnet-data-services/src/main/resources/eu/dnetlib/webContext-dnet-data-transformation-inspector.xml new file mode 100644 index 0000000..bcc222c --- /dev/null +++ b/dnet-data-services/src/main/resources/eu/dnetlib/webContext-dnet-data-transformation-inspector.xml @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + DS_ADMIN + + + + + \ No newline at end of file diff --git a/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/VocabularyTypeEditorTest.java b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/VocabularyTypeEditorTest.java new file mode 100644 index 0000000..c040279 --- /dev/null +++ b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/VocabularyTypeEditorTest.java @@ -0,0 +1,35 @@ +package eu.dnetlib.data.collective.transformation; + +import static org.junit.Assert.*; + +import org.junit.Before; +import org.junit.Test; + +/** + * @author js + * + */ +public class VocabularyTypeEditorTest { + + // class under test + private transient VocabularyTypeEditor editor; + private final String json = "{\"map\":{\"Languages\":{\"name\":\"Names of Languages\", \"caseSensitive\":\"false\", \"delimiter\":\"/\"}, \"TextTypologies\":{\"name\":\"Names of Text Object Typologies\", \"caseSensitive\":\"false\"}}}"; + + /** + * @throws java.lang.Exception + */ + @Before + public void setUp() throws Exception { + editor = new VocabularyTypeEditor(); + } + + @Test + public void testGenerate(){ + editor.setAsText(json); + assertNotNull(editor.getValue()); + assertTrue(editor.getValue() instanceof VocabularyMap); + assertTrue(((VocabularyMap)editor.getValue()).containsKey("Languages")); + assertFalse(((VocabularyMap)editor.getValue()).getMap().get("Languages").isCaseSensitive()); + } + +} diff --git a/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/PreProcessorTest.java b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/PreProcessorTest.java new file mode 100644 index 0000000..cbd683a --- /dev/null +++ b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/PreProcessorTest.java @@ -0,0 +1,141 @@ +package eu.dnetlib.data.collective.transformation.engine; + +import static org.junit.Assert.*; +import static org.mockito.Mockito.*; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.runners.MockitoJUnitRunner; + +import eu.dnetlib.data.collective.transformation.VocabularyMap; +import eu.dnetlib.data.collective.transformation.VocabularyRegistry; +import eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy; +import eu.dnetlib.data.collective.transformation.engine.functions.Convert; +import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException; +import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression; +import eu.dnetlib.data.collective.transformation.engine.functions.Vocabulary; +import eu.dnetlib.data.collective.transformation.rulelanguage.IRule; +import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall; + +//@RunWith(MockitoJUnitRunner.class) +public class PreProcessorTest { + + private transient PreProcessor preProc; + private transient TransformationFunctionProxy functionProxy = TransformationFunctionProxy.getInstance(); + private transient Convert convertFunction; + private transient RegularExpression regExprFunction; + + @Mock + private transient FunctionCall functionCall; + @Mock + private transient FunctionCall fc2; + private transient Map paramMap = new HashMap(); + private transient Map paramMap2 = new HashMap(); + + private transient Map nsMap = new HashMap(); + private transient Map staticResults = new HashMap(); + private transient VocabularyMap vocabularyMapWrapper = new VocabularyMap(); + private transient Map vocabulariesMap = new HashMap(); + private transient Map varRulesMap = new HashMap(); + private transient Map jobProperties = new HashMap(); + @Mock + private transient Vocabulary vocabulary; + private final String vocabularyName = "LANG"; + private final String[] fieldValue = {"xyz"}; + private final String encodedValue = "abc"; + private final String uniqueKey = "uniqueKey-123"; + private final String uniqueKey2 = "uniqueKey-456"; + @Mock + private transient VocabularyRegistry vocabularyRegistry; + + + //@Before + public void setUp()throws ProcessingException{ + nsMap.put("dc", "http://purl.org/dc/elements/1.1/"); + paramMap.put(Convert.paramFieldValue, "//dc:language[2]"); + paramMap.put(Convert.paramVocabularyName, "LANG"); + when(functionCall.getExternalFunctionName()).thenReturn("convert"); + when(functionCall.getParameters()).thenReturn(paramMap); + when(functionCall.getUuid()).thenReturn(uniqueKey); + + paramMap2.put(RegularExpression.paramExpr1, "x"); + paramMap2.put(RegularExpression.paramExpr2, "y"); + paramMap2.put(RegularExpression.paramRegularExpr, "z"); + when(fc2.getExternalFunctionName()).thenReturn("regExpr"); + when(fc2.getParameters()).thenReturn(paramMap2); + when(fc2.getUuid()).thenReturn(uniqueKey2); + when(fc2.isStatic()).thenReturn(true); + + vocabulariesMap.put(vocabularyName, vocabulary); + vocabularyMapWrapper.setMap(vocabulariesMap); + when(vocabularyRegistry.getVocabularies()).thenReturn(vocabularyMapWrapper); + when(vocabulary.getName()).thenReturn("someQuery"); + when(vocabularyRegistry.getVocabulary(vocabularyName)).thenReturn(vocabulary); + convertFunction = new Convert(); + convertFunction.setVocabularyRegistry(vocabularyRegistry); + //addVocabulary(vocabularyName, vocabulary); + regExprFunction = new RegularExpression(); + + preProc = new PreProcessor(); + preProc.setFunctionProxy(functionProxy); + preProc.setRegExprFunction(regExprFunction); + preProc.setConvertFunction(convertFunction); + when(vocabulary.encoding(Arrays.asList(fieldValue))).thenReturn(encodedValue); + } + + // deprecated +// @Test +// public void testConvert(){ +// List objRecords = getObjRecords(); +// assertFalse(objRecords.isEmpty()); +// preProc.preprocess(functionCall, objRecords, nsMap, staticResults, jobProperties, varRulesMap); +// assertEquals(encodedValue, preProc.getFunctionProxy().convert(uniqueKey, 0)); +// } + +// @Test +// public void testRegEpr(){ +// List objRecords = getObjRecords(); +// assertFalse(objRecords.isEmpty()); +// preProc.preprocess(fc2, objRecords, nsMap, staticResults); +// assertNotNull(preProc.getFunctionProxy().regExpr(uniqueKey2, 0)); +// } + + private List getObjRecords(){ + List objRecordList = new java.util.LinkedList(); + StringBuilder builder = new StringBuilder(); + builder.append(""); + builder.append(getMdRecord()); + builder.append(""); + objRecordList.add(builder.toString()); + return objRecordList; + } + + private String getMdRecord(){ + StringBuilder builder = new StringBuilder(); + builder.append(""); + builder.append("
"); + builder.append("234"); + builder.append("abc"); + builder.append("2009-09-30T13:08:57Z"); + builder.append(""); + builder.append(""); + builder.append("71f5069a-9ea2-41fa-968a-4f69a5722ad0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU="); + builder.append("
"); + builder.append(""); + builder.append("SomeTitle"); + builder.append("firstLang"); + builder.append("" + fieldValue[0] + ""); + builder.append("lastLang"); + builder.append(""); + builder.append(""); + builder.append("
"); + return builder.toString(); + } +} diff --git a/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/SimpleTransformationEngineTest.java b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/SimpleTransformationEngineTest.java new file mode 100644 index 0000000..d758ed8 --- /dev/null +++ b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/SimpleTransformationEngineTest.java @@ -0,0 +1,937 @@ +package eu.dnetlib.data.collective.transformation.engine; + +import static org.junit.Assert.*; +import static org.mockito.Mockito.*; + +import java.io.StringReader; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import javax.xml.transform.TransformerConfigurationException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.io.SAXReader; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; +import org.springframework.core.io.ClassPathResource; +import org.springframework.core.io.Resource; + +import eu.dnetlib.common.profile.ResourceDao; +import eu.dnetlib.data.collective.transformation.VocabularyMap; +import eu.dnetlib.data.collective.transformation.VocabularyRegistry; +import eu.dnetlib.data.collective.transformation.engine.core.TransformationImpl; +import eu.dnetlib.data.collective.transformation.engine.functions.DateVocabulary; +import eu.dnetlib.data.collective.transformation.engine.functions.PersonVocabulary; +import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException; +import eu.dnetlib.data.collective.transformation.engine.functions.Vocabulary; +import eu.dnetlib.data.collective.transformation.rulelanguage.RuleLanguageParser; + +@RunWith(MockitoJUnitRunner.class) +public class SimpleTransformationEngineTest { + + private static final Log log = LogFactory.getLog(SimpleTransformationEngineTest.class); + + private static final String xslTemplatePath = "eu/dnetlib/data/collective/transformation/engine/oaftemplate.xsl"; + private transient Resource xslTemplateResource = new ClassPathResource(xslTemplatePath); + private static final String schemaPath = "eu/dnetlib/data/collective/transformation/schema/DMFSchema_vTransformator.xsd"; + private transient Resource schemaResource = new ClassPathResource(schemaPath); + + private static final String xslTemplatePath_oaf = "eu/dnetlib/data/collective/transformation/engine/oaftemplate.xsl"; // OpenAIRE specific + private transient Resource xslTemplateResource_oaf = new ClassPathResource(xslTemplatePath_oaf); // OpenAIRE specific + private static final String schemaPath_oaf = "eu/dnetlib/data/collective/transformation/schema/OAFSchema_vTransformator.xsd"; // OpenAIRE specific + private transient Resource schemaResource_oaf = new ClassPathResource(schemaPath_oaf); // OpenAIRE specific + + // class under test + private transient SimpleTransformationEngine transformationEngine; + + private transient TransformationImpl transformation; + + private transient TransformationImpl transformationOAF; // OpenAIRE specific + + private transient TransformationImpl transformationProvenance; // OpenAIRE specific + provenance + + private transient TransformationImpl transformationAnyFunderProject; // OpenAIREplus specific + + private transient TransformationImpl transformationWoS; + + @Mock + private transient eu.dnetlib.common.profile.Resource resource; + @Mock + private transient ResourceDao resourceDao; + @Mock + private transient VocabularyRegistry vocabularyRegistry; + private transient VocabularyMap vocabularyMapWrapper = new VocabularyMap(); + private transient Map vocabularies = new HashMap(); + @Mock + private transient Vocabulary vocabularyLang; + + private transient DateVocabulary vocabularyDate = new DateVocabulary(); + + private transient PersonVocabulary vocabularyPerson = new PersonVocabulary(); + + private transient Vocabulary vocabularyTypes; + + private transient Vocabulary vocabularyRights; + private transient String[] rights = {"info:eu-repo/semantics/openAccess", + "info:eu-repo/semantics/closedAccess", + "info:eu-repo/semantics/embargoedAccess", + "info:eu-repo/semantics/restrictedAccess"}; + + private transient String repositoryId = "profile-123"; + + private transient String dataSinkId = "dnet://MDStoreDS/4-9c7cf682-849b-48bd-92cf-e65367f38e14_TURTdG9yZURTUmVzb3VyY2VzL01EU3RvcmVEU1Jlc291cmNlVHlwZQ==?type=REFRESH"; + + + @SuppressWarnings("unchecked") + @Before + public void setUp() throws TransformerConfigurationException, ProcessingException{ + System.setProperty("javax.xml.transform.TransformerFactory", "net.sf.saxon.TransformerFactoryImpl"); + List rightsOA = new LinkedList(); + rightsOA.add(rights[0]); + + vocabularyTypes = new Vocabulary(); + @SuppressWarnings("rawtypes") + Map encodingMap = new HashMap(); + encodingMap.put("name", "Article"); + encodingMap.put("encoding", "0001"); + encodingMap.put("code", "0001"); + encodingMap.put("synonyms", Arrays.asList(new String[]{"info:eu-repo/semantics/article"})); + List> termList = new LinkedList>(); + termList.add(encodingMap); + encodingMap = new HashMap(); + encodingMap.put("name", "Unknown"); + encodingMap.put("encoding", "0000"); + encodingMap.put("code", "0000"); + encodingMap.put("synonyms", Arrays.asList(new String[]{})); + termList.add(encodingMap); + + vocabularyTypes.setResource(termList); + + vocabularyRights = new Vocabulary(); + encodingMap = new HashMap(); + encodingMap.put("name", ""); + encodingMap.put("encoding", "OPEN"); + encodingMap.put("code", "OPEN"); + encodingMap.put("synonyms", Arrays.asList(new String[]{"info:eu-repo/semantics/openAccess"})); + termList = new LinkedList>(); + termList.add(encodingMap); + vocabularyRights.setResource(termList); + when(vocabularyRegistry.getVocabularies()).thenReturn(vocabularyMapWrapper); + when(vocabularyRegistry.getVocabulary("LangVocab")).thenReturn(vocabularyLang); + when(vocabularyRegistry.getVocabulary("RightsVocab")).thenReturn(vocabularyRights); + //when(vocabularyRegistry.getVocabulary("DateISO8601")).thenReturn(vocabularyDate); + //when(vocabularyRegistry.getVocabulary("Person")).thenReturn(vocabularyPerson); + //when(vocabularyRegistry.getVocabulary("TypesVocab")).thenReturn(vocabularyTypes); + when(vocabularyLang.encoding(anyList())).thenReturn("Unknown Language"); + //when(vocabularyLang.getName()).thenReturn("someQuery"); + vocabularies.put("LangVocab", vocabularyLang); + vocabularies.put("RightsVocab", vocabularyRights); // OpenAIRE specific + vocabularies.put("DateISO8601", vocabularyDate); + vocabularies.put("Person", vocabularyPerson); + vocabularies.put("TypesVocab", vocabularyTypes); + + vocabularyMapWrapper.setMap(vocabularies); + transformationEngine = new SimpleTransformationEngine(); + transformationEngine.setVocabularyRegistry(vocabularyRegistry); + transformationEngine.setResourceDao(resourceDao); + transformation = new TransformationImpl(); + transformation.addJobConstant(TransformationImpl.JOBCONST_DATASINKID, dataSinkId); + transformation.setTemplate(xslTemplateResource); + transformation.setSchema(schemaResource); + transformation.init(); + transformation.setRuleLanguageParser(getRuleLanguageParser(getTransformationScript())); + //transformation.setRootElement("record"); + transformation.configureTransformation(); + // OpenAIRE specific + transformationOAF = new TransformationImpl(); + transformationOAF.addJobConstant(TransformationImpl.JOBCONST_DATASINKID, dataSinkId); + transformationOAF.setTemplate(xslTemplateResource_oaf); + transformationOAF.setSchema(schemaResource_oaf); + transformationOAF.init(); + transformationOAF.setRuleLanguageParser(getRuleLanguageParser(getOAFTransformationScript())); + System.out.println("OAF CONFIGURE TRANSFORMATIOn"); + transformationOAF.configureTransformation(); + + transformationProvenance = new TransformationImpl(); + transformationProvenance.addJobConstant(TransformationImpl.JOBCONST_DATASINKID, dataSinkId); + transformationProvenance.setTemplate(xslTemplateResource_oaf); + transformationProvenance.setSchema(schemaResource_oaf); + transformationProvenance.init(); + transformationProvenance.setRuleLanguageParser(getRuleLanguageParser(getProvenanceTransformationScript() )); + transformationProvenance.configureTransformation(); + + transformationAnyFunderProject = new TransformationImpl(); + transformationAnyFunderProject.addJobConstant(TransformationImpl.JOBCONST_DATASINKID, dataSinkId); + transformationAnyFunderProject.setTemplate(xslTemplateResource_oaf); + transformationAnyFunderProject.setSchema(schemaResource_oaf); + transformationAnyFunderProject.init(); +// transformationAnyFunderProject.setRuleLanguageParser(getRuleLanguageParser(getFunderTransformationScript() )); + transformationAnyFunderProject.setRuleLanguageParser(getRuleLanguageParser(getOpenaireplusCompatibleFunderTransformationScript() )); + transformationAnyFunderProject.configureTransformation(); + + transformationWoS = new TransformationImpl(); + transformationWoS.addJobConstant(TransformationImpl.JOBCONST_DATASINKID, dataSinkId); + transformationWoS.setTemplate(xslTemplateResource_oaf); + transformationWoS.setSchema(schemaResource_oaf); + transformationWoS.init(); +// transformationAnyFunderProject.setRuleLanguageParser(getRuleLanguageParser(getFunderTransformationScript() )); + transformationWoS.setRuleLanguageParser(getRuleLanguageParser(getWosTransformationScript() )); + transformationWoS.configureTransformation(); + + + String xpathExprOnProfile = "//CONFIGURATION/OFFICIAL_NAME"; + String valueOnProfile = "repositoryOfficialName"; + try{ + when(resourceDao.getResourceByQuery("collection('/db/DRIVER/RepositoryServiceResources')//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=\"NamespacePrefix\"][value=\"\"]]")).thenReturn(resource); + when(resourceDao.getResource(repositoryId)).thenReturn(resource); + //when(resourceDao.getResourceByQuery("concat('collection()', '')")).thenReturn(resource); + when(resourceDao.getResourceByQuery("collection()")).thenReturn(resource); + }catch(Exception e){ + e.printStackTrace(); + } + when(resource.getValue(xpathExprOnProfile)).thenReturn(valueOnProfile); + String xpathExprDataSourceId = "//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"; // OpenAIRE specific + String valueDataSourceId = "opendoar::166"; // OpenAIRE specific + when(resource.getValue(xpathExprDataSourceId)).thenReturn(valueDataSourceId); // OpenAIRE specific + String xpathExprDataSourceType = "//EXTRA_FIELDS/FIELD[key='DataSourceType']/value"; // prototype + String valueDataSourceType = "Aggregator"; // prototype + when(resource.getValue(xpathExprDataSourceType)).thenReturn(valueDataSourceType); // prototype + + } + + @Test + public void testTransformationWithObjectRecords() throws DocumentException{ + transformationEngine.setTransformation(transformation); + List mdRecords = new LinkedList(); + mdRecords.add(getMdRecord("obj-132", "md-1", getDC())); +// mdRecords.add(getMdRecord("obj-132", "md-2", getDidl())); +// List objRecords = new LinkedList(); +// objRecords.add(getObjectRecord(mdRecords)); + @SuppressWarnings("unused") + String dump; + assertNotNull(dump = transformation.dumpStylesheet()); + List transformedMdRecordsResult = new LinkedList(); + for (String srcRecord: mdRecords){ + transformedMdRecordsResult.add(transformationEngine.transform(srcRecord)); + } +// assertEquals(objRecords.size(), transformedMdRecordsResult.size()); + //System.out.println(dump); + Document record = (new SAXReader()).read(new StringReader(transformedMdRecordsResult.get(0))); + assertEquals("", record.valueOf("//*[local-name()='header']/@status")); + assertNotNull("record contains no header node", record.selectSingleNode("//*[local-name()='header']")); + assertNotNull("record contains no metadata node", record.selectSingleNode("//*[local-name()='metadata']")); + assertEquals("http://somehost", record.valueOf("//dc:identifier")); + } + + @Test + public void testTransformationWithMdRecords() throws DocumentException{ + transformationEngine.setTransformation(transformation); +// List mdRecords = new LinkedList(); +// mdRecords.add(getMdRecord("obj-132", "md-1==::oai:bla-1", getDC())); + @SuppressWarnings("unused") + String dump; + assertNotNull(dump = transformation.dumpStylesheet()); + System.out.println("DUMP: " + dump); + + String transformedRecordResult = transformationEngine.transform(getMdRecord("obj-132", "md-1==::oai:bla-1", getDC())); +// assertEquals(mdRecords.size(), transformedMdRecordsResult.size()); + Document record = (new SAXReader()).read(new StringReader(transformedRecordResult)); + System.out.println(record.asXML()); +// assertEquals("", record.valueOf("//*[local-name()='header']/@status")); +// assertNotNull("record contains no header node", record.selectSingleNode("//*[local-name()='header']")); +// assertNotNull("record contains no metadata node", record.selectSingleNode("//*[local-name()='metadata']")); +// assertEquals("http://somehost", record.valueOf("//dc:identifier")); + } + + @Test + public void testTransformationWithMdRecords_oaf_failed() throws DocumentException{ + transformationEngine.setTransformation(transformationOAF); +// List mdRecords = new LinkedList(); +// mdRecords.add(getMdRecord("obj-132", "md-1", getOAFDC())); + String dump; + assertNotNull(dump = transformationOAF.dumpStylesheet()); + //System.out.println(dump); + String transformedRecordResult = transformationEngine.transform(getMdRecord("obj-132", "md-1", getOAFDC())); +// assertEquals(mdRecords.size(), transformedMdRecordsResult.size()); + System.out.println("*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*\r\n" + dump); + Document record = (new SAXReader()).read(new StringReader(transformedRecordResult)); + System.out.println(record.asXML()); + assertEquals("", record.valueOf("//*[local-name()='header']/@status")); + assertNotNull("record contains no header node", record.selectSingleNode("//*[local-name()='header']")); + assertNotNull("record contains no metadata node", record.selectSingleNode("//*[local-name()='metadata']")); + assertEquals("failed", record.valueOf("//*[local-name()='header']/@syntaxcheck")); + //assertEquals("http://somehost", record.valueOf("//*[local-name()='metadata']//*[local-name()='identifier']")); + //assertEquals("OPEN", record.valueOf("//oaf:accessrights")); // test convert function with rights vocabulary for many dc:rights elements + //assertEquals("0001", record.valueOf("//dr:CobjCategory")); + } + + @Test + public void testTransformationOfProjectInformation_oaf() throws DocumentException{ + transformationEngine.setTransformation(transformationAnyFunderProject); +// List mdRecords = new LinkedList(); +// mdRecords.add(getMdRecordWithProvenance("obj-132", "md-1", getOAFDC(), getProvenance())); + String dump; + System.err.println("BEFORE DUMP\r\n"); + assertNotNull(dump = transformationAnyFunderProject.dumpStylesheet()); + String transformedRecordResult = transformationEngine.transform(getMdRecordWithProvenance("obj-132", "md-1", getOAFDC(), getProvenance())); +// assertEquals(mdRecords.size(), transformedMdRecordsResult.size()); + System.err.println("DUMP\r\n" + dump); + Document record = (new SAXReader()).read(new StringReader(transformedRecordResult)); + System.out.println(record.asXML()); + assertEquals("", record.valueOf("oaf:projectid")); + } + + @Test + public void testTransformationOfWos_oaf() throws DocumentException{ + transformationEngine.setTransformation(transformationWoS); +// List mdRecords = new LinkedList(); +// mdRecords.add(getMdRecordWithProvenance("obj-132", "md-1", getOAFDC(), getProvenance())); + String dump; + assertNotNull(dump = transformationWoS.dumpStylesheet()); + String transformedRecordResult = transformationEngine.transform(getMdRecordWithProvenance("obj-132", "md-1", getWOS(), getProvenance())); +// assertEquals(mdRecords.size(), transformedMdRecordsResult.size()); + System.out.println("___WOS___"); + System.out.println(dump); + System.out.println("___WOS___"); + Document record = (new SAXReader()).read(new StringReader(transformedRecordResult)); + System.out.println(record.asXML()); + assertEquals("", record.valueOf("oaf:projectid")); + } + + + @Test + public void testTransformationWithMdRecords_provenance() throws DocumentException{ + transformationEngine.setTransformation(transformationProvenance); +// List mdRecords = new LinkedList(); +// mdRecords.add(getMdRecordWithProvenance("obj-132", "md-1", getOAFDC(), getProvenance() )); + String dump; + assertNotNull(dump = transformationProvenance.dumpStylesheet()); + String transformedRecordResult = transformationEngine.transform(getMdRecordWithProvenance("obj-132", "md-1", getOAFDC(), getProvenance() )); +// assertEquals(mdRecords.size(), transformedMdRecordsResult.size()); + System.out.println(dump); + Document record = (new SAXReader()).read(new StringReader(transformedRecordResult)); + System.out.println("_______________________________________"); + System.out.println(record.asXML()); + System.out.println("_______________________________________"); + assertEquals("", record.valueOf("//*[local-name()='header']/@status")); + assertNotNull("record contains no header node", record.selectSingleNode("//*[local-name()='header']")); + assertNotNull("record contains no metadata node", record.selectSingleNode("//*[local-name()='metadata']")); + assertEquals("http://somehost", record.valueOf("//dc:identifier")); + assertEquals("OPEN", record.valueOf("//oaf:accessrights")); // test convert function with rights vocabulary for many dc:rights elements + + } + +// @Test +// public void testTransformationWithSkippedRecord() throws DocumentException{ +// transformationEngine.setTransformation(transformation); +// List mdRecords = new LinkedList(); +// mdRecords.add(getMdRecord("obj-132", "md-1==::oai:bla-1", getDC())); +// mdRecords.add(getMdRecord("obj-133", "md-1==::oai:bla-2", getDC2())); +// List transformedMdRecordsResult = transformationEngine.transform(mdRecords); +// assertEquals(mdRecords.size() - 1, transformedMdRecordsResult.size()); +// //System.out.println(dump); +// Document record = (new SAXReader()).read(new StringReader(transformedMdRecordsResult.get(0))); +// System.out.println(record.asXML()); +// assertEquals("", record.valueOf("//*[local-name()='header']/@status")); +// assertNotNull("record contains no header node", record.selectSingleNode("//*[local-name()='header']")); +// assertNotNull("record contains no metadata node", record.selectSingleNode("//*[local-name()='metadata']")); +// assertEquals("http://somehost", record.valueOf("//dc:identifier")); +// } + + @Test + public void testTransformationWithDeletedRecord() throws DocumentException{ + transformationEngine.setTransformation(transformation); +// List mdRecords = new LinkedList(); +// mdRecords.add(getDeletedMdRecord("obj-132", "md-1")); + @SuppressWarnings("unused") + String dump; + assertNotNull(dump = transformation.dumpStylesheet()); + String transformedRecordResult = transformationEngine.transform(getDeletedMdRecord("obj-132", "md-1")); +// assertEquals(mdRecords.size(), transformedMdRecordsResult.size()); + //System.out.println(dump); + Document record = (new SAXReader()).read(new StringReader(transformedRecordResult)); + //System.out.println(record.asXML()); + assertEquals("deleted", record.valueOf("//*[local-name()='header']/@status")); + assertNotNull("record contains no header node", record.selectSingleNode("//*[local-name()='header']")); + assertNull("record contains a metadata node", record.selectSingleNode("//*[local-name()='metadata']")); + } + + @Test + public void testTransformationBehaviourWithEmptyMetadata() throws DocumentException{ + log.debug("applying OAF transformation"); + transformationEngine.setTransformation(transformationOAF); +// List mdRecords = new LinkedList(); +// mdRecords.add(getEmptyMetadataMdRecord("obj-132", "md-1")); + @SuppressWarnings("unused") + String dump; + assertNotNull(dump = transformationOAF.dumpStylesheet()); + String transformedRecordResult = transformationEngine.transform(getEmptyMetadataMdRecord("obj-132", "md-1")); +// assertEquals(mdRecords.size(), transformedMdRecordsResult.size()); + log.debug("record output: " + transformedRecordResult); + } + + private RuleLanguageParser getRuleLanguageParser(String aTransformationScript){ + RuleLanguageParser parser = new RuleLanguageParser(); + System.out.println(aTransformationScript); + StringReader reader = new StringReader(aTransformationScript); + parser.parse(reader); + return parser; + } + + private String getTransformationScript(){ + StringBuilder scriptBuilder = new StringBuilder(); + scriptBuilder.append("declare_script \"MainSample\";\r\n"); + scriptBuilder.append("declare_ns dr = \"http://www.driver-repository.eu/namespace/dr\";\r\n"); + scriptBuilder.append("declare_ns dri = \"http://www.driver-repository.eu/namespace/dri\";\r\n"); + scriptBuilder.append("declare_ns dc = \"http://purl.org/dc/elements/1.1/\";\r\n"); + scriptBuilder.append("declare_ns didl = \"urn:mpeg:mpeg21:2002:02-DIDL-NS\";\r\n"); + //scriptBuilder.append("$a1 = Convert(xpath:\"//dc:language[2]\",LangVocab);\r\n"); + //scriptBuilder.append("dri:mdFormat = $a1;\r\n"); + scriptBuilder.append("dr:CobjMDFormats = xpath:\"//dc:title\";\r\n"); + scriptBuilder.append("dr:repositoryName = getValue(PROFILEFIELD, [xpath:\"//dri:repositoryId\", xpath:\"//CONFIGURATION/OFFICIAL_NAME\"]);\r\n"); + scriptBuilder.append("dc:relation = xpath:\"//didl:Resource/@ref\";\r\n"); + scriptBuilder.append("dc:title = copy(\"dc:title\", \"//dc:title\", \"@*|node()\");\r\n"); + //scriptBuilder.append("if xpath:\"//dc:title\" dr:aggregatorName = \"abc\"; else dr:repositoryName = \"def\";\r\n"); + scriptBuilder.append("if xpath:\"//dc:language[4]\" dr:CobjTypology = xpath:\"//dc:language[2]\"; else dr:CobjTypology = \"test\";\r\n"); + scriptBuilder.append("apply xpath:\"//dc:identifier\" if xpath:\"starts-with(normalize-space(.), 'http')\" dc:identifier = xpath:\"normalize-space(.)\"; else dr:CobjIdentifier = xpath:\"normalize-space(.)\";\r\n"); + //scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language[2]\",LangVocab);\r\n"); + scriptBuilder.append("dc:language = Convert(xpath:\"normalize-space(//dc:language[2])\",LangVocab);\r\n"); + scriptBuilder.append("dri:recordIdentifier = RegExpr(xpath:\"//dri:recordIdentifier\", $var1, \"s/^(.*)(::)/$2/\");\r\n"); + scriptBuilder.append("$var0 = \"''\";\r\n"); + scriptBuilder.append("static $var1 = RegExpr($job.datasinkid, $var0, \"s/^(dnet:\\/\\/MDStoreDS\\/)|(\\?.*)//g\");\r\n"); + scriptBuilder.append("if xpath:\"//dc:format[text()='digital']\" dc:publisher = xpath:\"//dc:publisher\"; else dc:publisher = skipRecord();\r\n"); + scriptBuilder.append("end\r\n"); + + return scriptBuilder.toString(); + } + + private String getOAFTransformationScript(){ + StringBuilder scriptBuilder = new StringBuilder(); + scriptBuilder.append("declare_script \"MainSample_OAF\";\r\n"); + scriptBuilder.append("declare_ns oaf = \"http://namespace.openaire.eu/oaf\";\r\n"); + scriptBuilder.append("declare_ns dr = \"http://www.driver-repository.eu/namespace/dr\";\r\n"); + scriptBuilder.append("declare_ns dri = \"http://www.driver-repository.eu/namespace/dri\";\r\n"); + scriptBuilder.append("declare_ns dc = \"http://purl.org/dc/elements/1.1/\";\r\n"); + scriptBuilder.append("oaf:dateAccepted = Convert(xpath:\"descendant-or-self::dc:date\", DateISO8601, \"yyyy-MM-dd\", \"min()\");\r\n"); +// scriptBuilder.append("oaf:dateAccepted = Convert(xpath:\"//dc:date\", DateISO8601);\r\n"); + //scriptBuilder.append("dc:creator = Convert(xpath:\"descendant-or-self::dc:date\", DateISO8601, \"yyyy-MM-dd\", \"min()\");\r\n"); + scriptBuilder.append("apply xpath:\"//dc:date\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/date')\" oaf:embargoenddate = RegExpr(xpath:\"normalize-space(.)\", $var0, \"s/^(.*info:eu-repo\\/date\\/embargoEnd\\/)//g\"); else $var0 = \"''\";\r\n"); // retrieve from dc:date , test the info prefix + scriptBuilder.append("apply xpath:\"//dc:relation\" if xpath:\"string-length(translate(normalize-space(.),'info:eu-repo/grantAgreement/EC/FP7','')) = 5\" oaf:projectid = RegExpr(xpath:\"normalize-space(.)\", $var0, \"s/^(.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/)//gm\"); else dc:relation = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:relation , test the info prefix +// scriptBuilder.append("apply xpath:\"//dc:relation\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')\" oaf:projectid = RegExpr(xpath:\"normalize-space(.)\", $var0, \"s/^(.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/)//gm\"); else dc:relation = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:relation , test the info prefix +// scriptBuilder.append("apply xpath:\"//dc:rights\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/semantics')\" dc:rights = empty; else dc:rights = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:rights, better retrieve from vocabulary + scriptBuilder.append("oaf:accessrights = Convert(xpath:\"//dc:rights\", RightsVocab);\r\n"); + scriptBuilder.append("oaf:datasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n"); // retrieve from profile + scriptBuilder.append("dr:CobjMDFormats = xpath:\"//dc:title\";\r\n"); + scriptBuilder.append("dc:rights = skipRecord();\r\n"); + scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language\", LangVocab);\r\n"); + scriptBuilder.append("$varFulltext = xpath:\"//dc:relation[starts-with(., 'http')]\";\r\n"); + scriptBuilder.append("if xpath:\"//dc:relation[starts-with(., 'http')]\" oaf:fulltext = $varFulltext; else $var0 = \"''\";\r\n"); + scriptBuilder.append("oaf:person = set(xpath:\"//dc:creator\", @normalized = Convert(xpath:\".\", Person););\r\n"); +// scriptBuilder.append("apply xpath:\"//dc:creator\" if xpath:\"string-length(.) > 0\" oaf:person = set(xpath:\".\", @normalized = \"test\";); else dc:creator = xpath:\"normalize-space(.)\";\r\n"); +// scriptBuilder.append("dc:creator = xpath:\"//dc:creator\";\r\n"); +// scriptBuilder.append("apply xpath:\"//dc:creator\" if xpath:\"string-length(.) > 0\" dc:creator = Convert(xpath:\".\", Person); else $var0 = \"''\";\r\n"); + scriptBuilder.append("$varjournaltitle = \"'some title'\";\r\n"); + scriptBuilder.append("oaf:journal = set($varjournaltitle, @issn=\"1234-5678\"; , @eissn=\"1234-5679\";);\r\n"); + scriptBuilder.append("dr:repositoryName = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//CONFIGURATION/OFFICIAL_NAME\"]);\r\n"); + //scriptBuilder.append("if xpath:\"//dc:title\" dr:aggregatorName = \"abc\"; else dr:repositoryName = \"def\";\r\n"); + //scriptBuilder.append("if xpath:\"//dc:language[4]\" dr:CobjTypology = xpath:\"//dc:language[2]\"; else dr:CobjTypology = \"test\";\r\n"); + scriptBuilder.append("apply xpath:\"//dc:identifier\" if xpath:\"starts-with(normalize-space(.), 'http')\" dc:identifier = xpath:\"normalize-space(.)\"; else dr:CobjIdentifier = xpath:\"normalize-space(.)\";\r\n"); + //scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language[2]\",LangVocab);\r\n"); + scriptBuilder.append("$var0 = \"''\";\r\n"); // workaround - placeholder for an empty string + scriptBuilder.append("oaf:hostedBy = set(\"''\", @name = \"hostedName\"; , @id = \"hostedId\";);\r\n"); + scriptBuilder.append("$varId = identifierExtract('[\"//dc:identifier\"]' , xpath:\"./record\" , '(10[.][0-9]{4,}[^\\s\"/<>]*/[^\\s\"<>]+)');\r\n"); + scriptBuilder.append("oaf:identifier = set(xpath:\"$varId//value\", @identifierType = \"doi\";);\r\n"); + scriptBuilder.append("dr:CobjCategory = Convert(xpath:\"//dc:type\",TypesVocab);\r\n"); + + scriptBuilder.append("end\r\n"); + + return scriptBuilder.toString(); + } + + private String getProvenanceTransformationScript(){ + StringBuilder scriptBuilder = new StringBuilder(); + + scriptBuilder.append("declare_script \"MainSample_Provenance\";\r\n"); + scriptBuilder.append("declare_ns oaf = \"http://namespace.openaire.eu/oaf\";\r\n"); + scriptBuilder.append("declare_ns dr = \"http://www.driver-repository.eu/namespace/dr\";\r\n"); + scriptBuilder.append("declare_ns dri = \"http://www.driver-repository.eu/namespace/dri\";\r\n"); + scriptBuilder.append("declare_ns dc = \"http://purl.org/dc/elements/1.1/\";\r\n"); + scriptBuilder.append("declare_ns prov = \"http://www.openarchives.org/OAI/2.0/provenance\";\r\n"); +// scriptBuilder.append("dc:dateAccepted = Convert(xpath:\"//dc:date\", DateISO8601);\r\n"); + //scriptBuilder.append("dc:creator = Convert(xpath:\"descendant-or-self::dc:date\", DateISO8601, \"yyyy-MM-dd\", \"min()\");\r\n"); + //scriptBuilder.append("dc:creator = xpath:\"//dc:creator\";\r\n"); + scriptBuilder.append("apply xpath:\"//dc:date\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/date')\" oaf:embargoenddate = RegExpr(xpath:\"normalize-space(.)\", $var0, \"s/^(.*info:eu-repo\\/date\\/embargoEnd\\/)//g\"); else $var0 = \"''\";\r\n"); // retrieve from dc:date , test the info prefix + scriptBuilder.append("apply xpath:\"//dc:relation\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')\" oaf:projectid = RegExpr(xpath:\"normalize-space(.)\", $var0, \"s/^(.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/)//gm\"); else dc:relation = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:relation , test the info prefix +// scriptBuilder.append("apply xpath:\"//dc:rights\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/semantics')\" dc:rights = empty; else dc:rights = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:rights, better retrieve from vocabulary + scriptBuilder.append("oaf:accessrights = Convert(xpath:\"//dc:rights\", RightsVocab);\r\n"); + scriptBuilder.append("oaf:datasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n"); // retrieve from profile + scriptBuilder.append("static $varDsType = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='DataSourceType']/value\"]);\r\n"); + scriptBuilder.append("if xpath:\"$varDsType='Aggregator'\" oaf:hostingDatasourceid = xpath:\"//prov:baseURL\"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n"); + scriptBuilder.append("oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n"); + scriptBuilder.append("oaf:concept = \"'CONCEPT'\";\r\n"); + scriptBuilder.append("dr:CobjMDFormats = xpath:\"//dc:title\";\r\n"); + scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language\", LangVocab);\r\n"); + scriptBuilder.append("%myTemplate = split(xpath:\"//dc:creator/text()\", \"dc:creator\", \";\");\r\n"); + scriptBuilder.append("dr:repositoryName = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//CONFIGURATION/OFFICIAL_NAME\"]);\r\n"); + //scriptBuilder.append("if xpath:\"//dc:title\" dr:aggregatorName = \"abc\"; else dr:repositoryName = \"def\";\r\n"); + //scriptBuilder.append("if xpath:\"//dc:language[4]\" dr:CobjTypology = xpath:\"//dc:language[2]\"; else dr:CobjTypology = \"test\";\r\n"); + scriptBuilder.append("apply xpath:\"//dc:identifier\" if xpath:\"starts-with(normalize-space(.), 'http')\" dc:identifier = xpath:\"normalize-space(.)\"; else dr:CobjIdentifier = xpath:\"normalize-space(.)\";\r\n"); + //scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language[2]\",LangVocab);\r\n"); + scriptBuilder.append("$var0 = \"''\";\r\n"); // workaround - placeholder for an empty string + scriptBuilder.append("end\r\n"); + + return scriptBuilder.toString(); + } + + private String getWosTransformationScript(){ + StringBuilder scriptBuilder = new StringBuilder(); + + scriptBuilder.append("declare_script \"MainSample\";\r\n"); + scriptBuilder.append("declare_ns oaf = \"http://namespace.openaire.eu/oaf\";\r\n"); + scriptBuilder.append("declare_ns dr = \"http://www.driver-repository.eu/namespace/dr\";\r\n"); + scriptBuilder.append("declare_ns dri = \"http://www.driver-repository.eu/namespace/dri\";\r\n"); + scriptBuilder.append("declare_ns dc = \"http://purl.org/dc/elements/1.1/\";\r\n"); + scriptBuilder.append("declare_ns prov = \"http://www.openarchives.org/OAI/2.0/provenance\";\r\n"); + // header + scriptBuilder.append("dri:objIdentifier = xpath:\"//dri:objIdentifier\";\r\n"); + scriptBuilder.append("dri:recordIdentifier = xpath:\"//csvRecord/row[@name='UT']\";\r\n"); + scriptBuilder.append("dri:dateOfCollection = xpath:\"//dri:dateOfCollection\";\r\n"); +// scriptBuilder.append("dri:repositoryId;\r\n"); +// scriptBuilder.append("dri:datasourceprefix;\r\n"); + // metadata + scriptBuilder.append("dc:language = Convert(xpath:\"//csvRecord/row[@name='LA']\", LangVocab);\r\n"); + scriptBuilder.append("dc:title = xpath:\"//csvRecord/row[@name='TI']\";\r\n"); + scriptBuilder.append("%myTemplate = split(xpath:\"//csvRecord/row[@name='AF']/text()\", \"dc:creator\", \";\");\r\n"); + scriptBuilder.append("$varIssn = xpath:\"//csvRecord/row[@name='SN']\";"); + scriptBuilder.append("oaf:journal = set(xpath:\"//csvRecord/row[@name='SO']\", @issn = $varIssn;);\r\n"); +// scriptBuilder.append("dr:CobjCategory = Convert(xpath:\"//csvRecord/row[@name='DT']\", TextTypologies);\r\n"); + scriptBuilder.append("dr:CobjCategory = Convert(xpath:\"//csvRecord/row[@name='DT']\", LangVocab);\r\n"); + scriptBuilder.append("dc:subject = xpath:\"//csvRecord/row[@name='ID']\";\r\n"); + scriptBuilder.append("dc:description = xpath:\"//csvRecord/row[@name='AB']\";\r\n"); + scriptBuilder.append("dc:publisher = xpath:\"//csvRecord/row[@name='PU']\";\r\n"); + scriptBuilder.append("dc:dateAccepted = xpath:\"//csvRecord/row[@name='PY']\";\r\n"); + scriptBuilder.append("$varDoi = xpath:\"concat('http://dx.doi.org/', normalize-space(//csvRecord/row[@name='DI']))\";\r\n"); + scriptBuilder.append("dc:identifier = $varDoi;\r\n"); + scriptBuilder.append("$varPart1 = xpath:\"concat('Test', 'No.2')\";\r\n"); + scriptBuilder.append("dc:subject = $varPart1;\r\n"); + scriptBuilder.append("$varIfTest = xpath:\"//dc:creator\";\r\n"); + scriptBuilder.append("if xpath:\"count($varIfTest) > 0\" dc:subject = \"'yes'\"; else dc:subject = \"'no'\";\r\n"); + scriptBuilder.append("oaf:identifier = set(xpath:\"//csvRecord/row[@name='DI']\", @identifierType = \"doi\";);\r\n"); + scriptBuilder.append("oaf:fundingunit = xpath:\"//csvRecord/row[@name='FU']\";\r\n"); + scriptBuilder.append("oaf:fundingtext = xpath:\"//csvRecord/row[@name='FX']\";\r\n"); + // adapt the attribute values for name and id + scriptBuilder.append("oaf:hostedBy = set(\"''\", @name=\"Unknown Repository\";, @id=\"openaire____::55045bd2a65019fd8e6741a755395c8c\";);\r\n"); + scriptBuilder.append("oaf:collectedFrom = set(\"''\", @name=\"Unknown Repository\";, @id=\"openaire____::55045bd2a65019fd8e6741a755395c8c\";);\r\n"); + + scriptBuilder.append("end\r\n"); + return scriptBuilder.toString(); + } + + private String getFunderTransformationScript(){ + StringBuilder scriptBuilder = new StringBuilder(); + + scriptBuilder.append("declare_script \"MainSample\";\r\n"); + scriptBuilder.append("declare_ns oaf = \"http://namespace.openaire.eu/oaf\";\r\n"); + scriptBuilder.append("declare_ns dr = \"http://www.driver-repository.eu/namespace/dr\";\r\n"); + scriptBuilder.append("declare_ns dri = \"http://www.driver-repository.eu/namespace/dri\";\r\n"); + scriptBuilder.append("declare_ns dc = \"http://purl.org/dc/elements/1.1/\";\r\n"); + scriptBuilder.append("declare_ns prov = \"http://www.openarchives.org/OAI/2.0/provenance\";\r\n"); +// scriptBuilder.append("dc:dateAccepted = Convert(xpath:\"//dc:date\", DateISO8601);\r\n"); + //scriptBuilder.append("dc:creator = Convert(xpath:\"descendant-or-self::dc:date\", DateISO8601, \"yyyy-MM-dd\", \"min()\");\r\n"); + //scriptBuilder.append("dc:creator = xpath:\"//dc:creator\";\r\n"); + scriptBuilder.append("apply xpath:\"//dc:date\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/date')\" oaf:embargoenddate = RegExpr(xpath:\"normalize-space(.)\", $var0, \"s/^(.*info:eu-repo\\/date\\/embargoEnd\\/)//g\"); else $var0 = \"''\";\r\n"); // retrieve from dc:date , test the info prefix + scriptBuilder.append("apply xpath:\"//dc:relation\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')\" oaf:projectid = RegExpr(xpath:\"normalize-space(.)\", $var0, \"/info:eu-repo/grantAgreement/([A-Za-z]+)/(.*)/([0-9]+)/\"); else dc:relation = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:relation , test the info prefix +// scriptBuilder.append("apply xpath:\"//dc:rights\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/semantics')\" dc:rights = empty; else dc:rights = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:rights, better retrieve from vocabulary + scriptBuilder.append("oaf:accessrights = Convert(xpath:\"//dc:rights\", RightsVocab);\r\n"); + scriptBuilder.append("oaf:datasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection(')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n"); // retrieve from profile + scriptBuilder.append("static $varDsType = getValue(PROFILEFIELD, [xpath:\"concat('collection(')\", xpath:\"//EXTRA_FIELDS/FIELD[key='DataSourceType']/value\"]);\r\n"); + scriptBuilder.append("if xpath:\"$varDsType='Aggregator'\" oaf:hostingDatasourceid = xpath:\"//prov:baseURL\"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:\"//dri:repositoryId\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n"); + scriptBuilder.append("oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection(')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n"); + scriptBuilder.append("dr:CobjMDFormats = xpath:\"//dc:title\";\r\n"); + scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language\", LangVocab);\r\n"); + scriptBuilder.append("%myTemplate = split(xpath:\"//dc:creator/text()\", \"dc:creator\", \";\");\r\n"); + scriptBuilder.append("dr:repositoryName = getValue(PROFILEFIELD, [xpath:\"concat('collection(')\", xpath:\"//CONFIGURATION/OFFICIAL_NAME\"]);\r\n"); + //scriptBuilder.append("if xpath:\"//dc:title\" dr:aggregatorName = \"abc\"; else dr:repositoryName = \"def\";\r\n"); + //scriptBuilder.append("if xpath:\"//dc:language[4]\" dr:CobjTypology = xpath:\"//dc:language[2]\"; else dr:CobjTypology = \"test\";\r\n"); + scriptBuilder.append("apply xpath:\"//dc:identifier\" if xpath:\"starts-with(normalize-space(.), 'http')\" dc:identifier = xpath:\"normalize-space(.)\"; else dr:CobjIdentifier = xpath:\"normalize-space(.)\";\r\n"); + //scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language[2]\",LangVocab);\r\n"); + scriptBuilder.append("$var0 = \"''\";\r\n"); // workaround - placeholder for an empty string + scriptBuilder.append("$varPmc = \"'PMC:123456'\";\r\n"); + scriptBuilder.append("oaf:identifier = set($varPmc, @identifierType = \"pmc\";);"); + scriptBuilder.append("end\r\n"); + + return scriptBuilder.toString(); + } + + private String getOpenaireplusCompatibleFunderTransformationScript(){ + StringBuilder scriptBuilder = new StringBuilder(); + + scriptBuilder.append("declare_script \"MainSample\";\r\n"); + scriptBuilder.append("declare_ns oaf = \"http://namespace.openaire.eu/oaf\";\r\n"); + scriptBuilder.append("declare_ns dr = \"http://www.driver-repository.eu/namespace/dr\";\r\n"); + scriptBuilder.append("declare_ns dri = \"http://www.driver-repository.eu/namespace/dri\";\r\n"); + scriptBuilder.append("declare_ns dc = \"http://purl.org/dc/elements/1.1/\";\r\n"); + scriptBuilder.append("declare_ns prov = \"http://www.openarchives.org/OAI/2.0/provenance\";\r\n"); +// scriptBuilder.append("dc:dateAccepted = Convert(xpath:\"//dc:date\", DateISO8601);\r\n"); + //scriptBuilder.append("dc:creator = Convert(xpath:\"descendant-or-self::dc:date\", DateISO8601, \"yyyy-MM-dd\", \"min()\");\r\n"); + //scriptBuilder.append("dc:creator = xpath:\"//dc:creator\";\r\n"); + scriptBuilder.append("apply xpath:\"//dc:date\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/date')\" oaf:embargoenddate = RegExpr(xpath:\"normalize-space(.)\", $var0, \"s/^(.*info:eu-repo\\/date\\/embargoEnd\\/)//g\"); else $var0 = \"''\";\r\n"); // retrieve from dc:date , test the info prefix +// String regExpr = "s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/([0-9]+).*/$1/gm"; + String arg = "$1"; // TODO +// scriptBuilder.append("$varPrj0 = RegExpr(xpath:\"//dc:relation[0][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]\", $var0, \"s/^.*info:eu-repo\\/grantAgreement\\/EU\\/FP7\\/([0-9]+)//gm\");"); + scriptBuilder.append("$varCorda = \"'corda_______::$1'\";\r\n"); + + String regExpr = "s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/(\\d\\d\\d\\d\\d\\d).*/$1/gm"; + scriptBuilder.append("$varArg = \"'$1'\";\r\n"); + scriptBuilder.append("$varPrj1 = " + +"RegExpr(xpath:\"//dc:relation[1][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]\", " + +"$varCorda, \"s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/(\\d\\d\\d\\d\\d\\d).*//gm\");\r\n"); + scriptBuilder.append("$varPrj2 = " + +"RegExpr(xpath:\"//dc:relation[2][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]\", " + +"$varCorda, \"s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/(\\d\\d\\d\\d\\d\\d).*//gm\");\r\n"); + scriptBuilder.append("$varPrj3 = " + +"RegExpr(xpath:\"//dc:relation[3][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]\", " + +"$varCorda, \"s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/(\\d\\d\\d\\d\\d\\d).*//gm\");\r\n"); + scriptBuilder.append("$varPrj4 = " + +"RegExpr(xpath:\"//dc:relation[4][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]\", " + +"$varCorda, \"s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/(\\d\\d\\d\\d\\d\\d).*//gm\");\r\n"); + scriptBuilder.append("$varPrj5 = " + +"RegExpr(xpath:\"//dc:relation[5][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]\", " + +"$varCorda, \"s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/(\\d\\d\\d\\d\\d\\d).*//gm\");\r\n"); + scriptBuilder.append("$varPrj6 = " + +"RegExpr(xpath:\"//dc:relation[6][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]\", " + +"$varCorda, \"s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/(\\d\\d\\d\\d\\d\\d).*//gm\");\r\n"); + scriptBuilder.append("if xpath:\"string-length($varPrj1) = 20\" oaf:projectid = $varPrj1; else $var0 = \"''\";\r\n"); + scriptBuilder.append("if xpath:\"string-length($varPrj2) = 20\" oaf:projectid = $varPrj2; else $var0 = \"''\";\r\n"); + scriptBuilder.append("if xpath:\"string-length($varPrj3) = 20\" oaf:projectid = $varPrj3; else $var0 = \"''\";\r\n"); + scriptBuilder.append("if xpath:\"string-length($varPrj4) = 20\" oaf:projectid = $varPrj4; else $var0 = \"''\";\r\n"); + scriptBuilder.append("if xpath:\"string-length($varPrj5) = 20\" oaf:projectid = $varPrj5; else $var0 = \"''\";\r\n"); + scriptBuilder.append("if xpath:\"string-length($varPrj6) = 20\" oaf:projectid = $varPrj6; else $var0 = \"''\";\r\n"); +// scriptBuilder.append("apply xpath:\"//dc:relation[starts-with(., 'info:eu-repo/grantAgreement')]\" if xpath:\"string-length() = 6\" oaf:projectid = RegExpr(xpath:\"normalize-space(.)\", $var0, \"/info:eu-repo/grantAgreement/([A-Za-z]+)/(.*)/([0-9]+)/\"); else dc:relation = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:relation , test the info prefix +// scriptBuilder.append("apply xpath:\"//dc:relation\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')\" oaf:projectid = RegExpr(xpath:\"normalize-space(.)\", $var0, \"/info:eu-repo/grantAgreement/([A-Za-z]+)/(.*)/([0-9]+)/\"); else dc:relation = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:relation , test the info prefix +// scriptBuilder.append("apply xpath:\"//dc:rights\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/semantics')\" dc:rights = empty; else dc:rights = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:rights, better retrieve from vocabulary + scriptBuilder.append("oaf:accessrights = Convert(xpath:\"//dc:rights\", RightsVocab);\r\n"); + scriptBuilder.append("oaf:datasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n"); // retrieve from profile + scriptBuilder.append("static $varDsType = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='DataSourceType']/value\"]);\r\n"); + scriptBuilder.append("if xpath:\"$varDsType='Aggregator'\" oaf:hostingDatasourceid = xpath:\"//prov:baseURL\"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n"); + scriptBuilder.append("oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n"); + scriptBuilder.append("dr:CobjMDFormats = xpath:\"//dc:title\";\r\n"); + scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language\", LangVocab);\r\n"); + scriptBuilder.append("%myTemplate = split(xpath:\"//dc:creator/text()\", \"dc:creator\", \";\");\r\n"); + scriptBuilder.append("dr:repositoryName = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//CONFIGURATION/OFFICIAL_NAME\"]);\r\n"); + //scriptBuilder.append("if xpath:\"//dc:title\" dr:aggregatorName = \"abc\"; else dr:repositoryName = \"def\";\r\n"); + //scriptBuilder.append("if xpath:\"//dc:language[4]\" dr:CobjTypology = xpath:\"//dc:language[2]\"; else dr:CobjTypology = \"test\";\r\n"); + scriptBuilder.append("apply xpath:\"//dc:identifier\" if xpath:\"starts-with(normalize-space(.), 'http')\" dc:identifier = xpath:\"normalize-space(.)\"; else dr:CobjIdentifier = xpath:\"normalize-space(.)\";\r\n"); + //scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language[2]\",LangVocab);\r\n"); + scriptBuilder.append("$var0 = \"''\";\r\n"); // workaround - placeholder for an empty string + scriptBuilder.append("$varPmc = \"'PMC:123456'\";\r\n"); + scriptBuilder.append("oaf:identifier = set($varPmc, @identifierType = \"pmc\";);"); + scriptBuilder.append("end\r\n"); + + return scriptBuilder.toString(); + } + + + private String getObjectRecord(List mdRecords){ + StringBuilder builder = new StringBuilder(); + builder.append(""); + for (String record: mdRecords) builder.append(record); + builder.append(""); + return builder.toString(); + } + + private String getMdRecord(String objIdentifier, String recordIdentifier, String metadata){ + StringBuilder builder = new StringBuilder(); + builder.append(""); + builder.append("
"); + builder.append(""); + builder.append(objIdentifier); + builder.append(""); + builder.append("profile-123"); + builder.append(""); + builder.append(recordIdentifier); + builder.append(""); + builder.append("2009-09-30T13:08:57Z"); + builder.append(""); + builder.append(""); + builder.append("71f5069a-9ea2-41fa-968a-4f69a5722ad0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU="); + builder.append("
"); + builder.append(metadata); + builder.append(""); + builder.append("
"); + return builder.toString(); + } + + private String getMdRecordWithProvenance(String objIdentifier, String recordIdentifier, String metadata, String provenance){ + StringBuilder builder = new StringBuilder(); + builder.append(""); + builder.append("
"); + builder.append(""); + builder.append(objIdentifier); + builder.append(""); + builder.append("profile-123"); + builder.append(""); + builder.append(recordIdentifier); + builder.append(""); + builder.append("2009-09-30T13:08:57Z"); + builder.append(""); + builder.append(""); + builder.append("71f5069a-9ea2-41fa-968a-4f69a5722ad0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU="); + builder.append("
"); + builder.append(metadata); + builder.append(""); + builder.append(""); + builder.append(provenance); + builder.append(""); + builder.append("
"); + return builder.toString(); + } + + private String getDeletedMdRecord(String objIdentifier, String recordIdentifier){ + StringBuilder builder = new StringBuilder(); + builder.append(""); + builder.append("
"); + builder.append(""); + builder.append(objIdentifier); + builder.append(""); + builder.append("profile-123"); + builder.append(""); + builder.append(recordIdentifier); + builder.append(""); + builder.append("2009-09-30T13:08:57Z"); + builder.append(""); + builder.append(""); + builder.append("71f5069a-9ea2-41fa-968a-4f69a5722ad0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU="); + builder.append("
"); + builder.append("
"); + return builder.toString(); + } + + private String getEmptyMetadataMdRecord(String objIdentifier, String recordIdentifier){ + // this is an exception case + StringBuilder builder = new StringBuilder(); + builder.append(""); + builder.append("
"); + builder.append(""); + builder.append(objIdentifier); + builder.append(""); + builder.append(""); + builder.append(recordIdentifier); + builder.append(""); + builder.append("2011-03-29T08:41:48Z"); + builder.append("profile-123"); + builder.append("oai:openaire.cern.ch:8"); + builder.append("2010-12-11T19:14:26Z"); + builder.append("EC_fundedresources"); + builder.append("
"); + builder.append(""); + builder.append(""); + builder.append("
"); + return builder.toString(); + } + + private String getDC(){ + StringBuilder builder = new StringBuilder(); + builder.append(""); + builder.append("SomeTitle"); + builder.append("Irgendein Titel"); + //builder.append("SomeTitle"); + builder.append("firstLang"); + builder.append(" \r\n" + "middleLang "); + builder.append("lastLang"); + builder.append("Any Author"); + builder.append("First Contributor"); + builder.append("Second Contributor"); + builder.append("9"); + builder.append("application/pdf"); + builder.append("digital"); + builder.append("dc"); + builder.append(" http://somehost "); + builder.append("urn:nbn:123-456"); + builder.append("4-9c7cf682-849b-48bd-92cf-e65367f38e14_TURTdG9yZURTUmVzb3VyY2VzL01EU3RvcmVEU1Jlc291cmNlVHlwZQ=="); + builder.append("someType"); + builder.append("info:eu-repo/semantics/article"); + builder.append(""); + return builder.toString(); + } + + private String getDC2(){ + StringBuilder builder = new StringBuilder(); + builder.append(""); + builder.append("SomeTitle"); + //builder.append("SomeTitle"); + builder.append("firstLang"); + builder.append("middleLang"); + builder.append("lastLang"); + builder.append("Any Author"); + builder.append("First Contributor"); + builder.append("Second Contributor"); + builder.append(" http://somehost "); + builder.append("urn:nbn:123-456"); + builder.append("4-9c7cf682-849b-48bd-92cf-e65367f38e14_TURTdG9yZURTUmVzb3VyY2VzL01EU3RvcmVEU1Jlc291cmNlVHlwZQ=="); + builder.append("someType"); + builder.append("info:eu-repo/semantics/article"); + builder.append(""); + return builder.toString(); + } + + private String getDidl(){ + StringBuilder builder = new StringBuilder(); + builder.append(""); + builder.append(""); + builder.append(""); + builder.append(""); + builder.append("info:eu-repo/semantics/humanStartPage"); + builder.append(""); + builder.append(""); + builder.append(""); + builder.append(""); + + return builder.toString(); + } + + private String getWOS(){ + StringBuilder builder = new StringBuilder(); + builder.append("" + + "
" + + " ::00a3e38eff10c4f2f35ffde55ee22e63" + + " WOS:000298601300043" + + " 2013-10-29T10:25:51+01:00" + + " " + + " " + + "
" + + " " + + " " + + " true" + + " J" + + " Punta, M; Coggill, PC; Eberhardt, RY; Mistry, J; Tate, J; Boursnell, C; Pang, N;Forslund, K; Ceric, G; Clements, J; Heger, A; Holm, L; Sonnhammer, ELL; Eddy, SR; Bateman, A; Finn, RD" + + " Punta, Marco; Coggill, Penny C.; Eberhardt, Ruth Y.; Mistry, Jaina; Tate, John; Boursnell, Chris; Pang, Ningze; Forslund, Kristoffer; Ceric, Goran; Clements, Jody; Heger, Andreas; Holm, Liisa; Sonnhammer, Erik L. L.; Eddy, Sean R.; Bateman, Alex; Finn, Robert D." + + " The Pfam protein families database" + + " NUCLEIC ACIDS RESEARCH" + + " English" + + " Article" + + " CRYSTAL-STRUCTURE; DOMAIN; IDENTIFICATION; ANNOTATION; HOMOLOGY; CAPSULE; REVEALS; SEARCH" + + " Pfam is a widely used database of protein families, currently containing more than 13 000 manually curated protein families as of release 26.0. Pfam is available via servers in the UK (http://pfam.sanger.ac.uk/), the USA (http://pfam.janelia.org/) and Sweden" + + " (http://pfam.sbc.su.se/). Here, we report on changes that have occurred since our 2010 NAR paper (release 24.0). Over the last 2 years, we have generated 1840 new families and" + + " increased coverage of the UniProt Knowledgebase (UniProtKB) to nearly 80%. Notably, we have" + + " taken the step of opening up the annotation of our families to the Wikipedia community, by" + + " linking Pfam families to relevant Wikipedia pages and encouraging the Pfam and Wikipedia" + + " communities to improve and expand those pages. We continue to improve the Pfam website and" + + " add new visualizations, such as the 'sunburst' representation of taxonomic distribution of" + + " families. In this work we additionally address two topics that will be of particular" + + " interest to the Pfam community. First, we explain the definition and use of family-specific," + + " manually curated gathering thresholds. Second, we discuss some of the features of domains of" + + " unknown function (also known as DUFs), which constitute a rapidly growing class of families" + + " within Pfam." + + " [Punta, Marco; Coggill, Penny C.; Eberhardt, Ruth Y.; Mistry, Jaina; Tate, John;" + + " Boursnell, Chris; Pang, Ningze; Bateman, Alex] Wellcome Trust Sanger Inst, Hinxton CB10 1SA," + + " England; [Forslund, Kristoffer; Sonnhammer, Erik L. L.] Stockholm Univ, Dept Biochem &" + + " Biophys, Sci Life Lab, Swedish eSci Res Ctr,Stockholm Bioinformat Ctr, SE-17121 Solna," + + " Sweden; [Ceric, Goran; Clements, Jody; Eddy, Sean R.; Finn, Robert D.] HHMI Janelia Farm Res" + + " Campus, Ashburn, VA 20147 USA; [Heger, Andreas] Univ Oxford, MRC Funct Genom Unit, Dept" + + " Physiol Anat & Genet, Oxford OX1 3QX, England; [Holm, Liisa] Univ Helsinki, Inst" + + " Biotechnol, Helsinki 00014, Finland; [Holm, Liisa] Univ Helsinki, Dept Biol & Environm" + + " Sci, FIN-00014 Helsinki, Finland" + + " Punta, M (reprint author), Wellcome Trust Sanger Inst, Wellcome Trust Genome" + + " Campus, Hinxton CB10 1SA, England." + + " mp13@sanger.ac.uk" + + " Wellcome Trust [WT077044/Z/05/Z]; BBSRC [BB/F010435/1]; Howard Hughes Medical" + + " Institute; Stockholm University; Royal Institute of Technology; Swedish Natural Sciences" + + " Research Council" + + " Wellcome Trust (grant numbers WT077044/Z/05/Z); BBSRC Bioinformatics and" + + " Biological Resources Fund (grant numbers BB/F010435/1); Howard Hughes Medical Institute (to" + + " G. C., J.C., S. R. E and R. D. F.); Stockholm University, Royal Institute of Technology and" + + " the Swedish Natural Sciences Research Council (to K. F. and E. L. L. S.) and Systems, Web" + + " and Database administration teams at Wellcome Trust Sanger Institute (WTSI) (infrastructure" + + " support). Funding for open access charge: Wellcome Trust (grant numbers WT077044/Z/05/Z);" + + " BBSRC Bioinformatics and Biological Resources Fund (grant numbers BB/F010435/1)." + + " 29" + + " 92" + + " 94" + + " OXFORD UNIV PRESS" + + " OXFORD " + + " GREAT CLARENDON ST, OXFORD OX2 6DP, ENGLAND" + + " 0305-1048" + + " NUCLEIC ACIDS RES" + + " Nucleic Acids Res." + + " JAN" + + " 2012" + + " 40" + + " D1" + + " D290" + + " D301" + + " 10.1093/nar/gkr1065 " + + "12" + + " Biochemistry & Molecular Biology" + + " Biochemistry & Molecular Biology" + + " 869MD" + + " WOS:000298601300043" + + " " + + " " + + "
"); + return builder.toString(); + } + + private String getOAFDC(){ + StringBuilder builder = new StringBuilder(); + builder.append( + "" + + "Grass roots lobbying: marketing politics and policy 'beyond the Beltway'" + + "McGrath, Conor; Xyz, Opq" + + "Abc, Def" + + "Muñoz-Castellanos, L" + + "JA Political science (General)" + + "" + + "" + + "2011" + + "info:eu-repo/date/embargoEnd/2011-05-12" + + "2004-03-15" + + "2009-02-24T13:27:42Z" + + "2009-02" + + "2009" + + "Conference or Workshop Item" + + "NonPeerReviewed" + + "info:eu-repo/semantics/article" + + "http://somehost" + + "application/pdf" + + "info:eu-repo/grantAgreement/EC/FP7/241479" + + "http://sherpa.bl.uk/1/01/PMMcgrath.pdf" + + "info:eu-repo/grantAgreement/EC/FP7/246682/EU/Towards a 10-Year Vision for Global Research Data Infrastructures/GRDI2020" + + "info:eu-repo/grantAgreement/EC/FP7/PITN-GA-2009-237252" + + "info:eu-repo/grantAgreement/EC/FP7/PITN-GA-2009-235114" + + "info:eu-repo/grantAgreement/EC/FP7/237252" + + "http://dx.doi.org/10.1103/PhysRevLett.104.126402" + + "Tots els drets reservats" + + "Used by permission of the publisher" + + "info:eu-repo/semantics/openAccess " + + ""); + return builder.toString(); + } + + private String getProvenance(){ + StringBuilder builder = new StringBuilder(); + builder.append( + "" + + "" + + "http://dspace.library.uu.nl:8080/dspace-oai/request" + + "oai:dspace.library.uu.nl:1874/218065" + + "2012-01-19T12:38:56Z" + + "http://www.loc.gov/mods/v3" + + "" + + ""); + return builder.toString(); + + } + +} diff --git a/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/StylesheetTest.java b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/StylesheetTest.java new file mode 100644 index 0000000..0415579 --- /dev/null +++ b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/StylesheetTest.java @@ -0,0 +1,84 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.engine; + +import java.io.StringReader; + +import javax.xml.transform.TransformerConfigurationException; + +import org.junit.Before; +import org.junit.Test; +import org.springframework.core.io.ClassPathResource; +import org.springframework.core.io.Resource; + +import eu.dnetlib.data.collective.transformation.engine.core.TransformationImpl; +import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException; +import eu.dnetlib.data.collective.transformation.rulelanguage.RuleLanguageParser; + +/** + * @author jochen + * + */ +public class StylesheetTest { + + private static final String xslTemplatePath_oaf = "eu/dnetlib/data/collective/transformation/engine/oaftemplate.xsl"; // OpenAIRE specific + private transient Resource xslTemplateResource = new ClassPathResource(xslTemplatePath_oaf); // OpenAIRE specific + private static final String schemaPath_oaf = "eu/dnetlib/data/collective/transformation/schema/OAFSchema_vTransformator.xsd"; // OpenAIRE specific + private transient Resource schemaResource = new ClassPathResource(schemaPath_oaf); // OpenAIRE specific + + private transient TransformationImpl transformation; + + @SuppressWarnings("unchecked") + @Before + public void setUp() throws TransformerConfigurationException, ProcessingException{ + System.setProperty("javax.xml.transform.TransformerFactory", "net.sf.saxon.TransformerFactoryImpl"); + + transformation = new TransformationImpl(); + transformation.setTemplate(xslTemplateResource); + transformation.setSchema(schemaResource); + transformation.init(); + transformation.setRuleLanguageParser(getRuleLanguageParser(getTransformationScript())); + //transformation.setRootElement("record"); + transformation.configureTransformation(); + + } + + @Test + public void testStylesheetSkip(){ + System.out.println(transformation.dumpStylesheet()); + } + + private String getTransformationScript(){ + StringBuilder scriptBuilder = new StringBuilder(); + scriptBuilder.append("declare_script \"MainSample\";\r\n"); + scriptBuilder.append("declare_ns dr = \"http://www.driver-repository.eu/namespace/dr\";\r\n"); + scriptBuilder.append("declare_ns dri = \"http://www.driver-repository.eu/namespace/dri\";\r\n"); + scriptBuilder.append("declare_ns dc = \"http://purl.org/dc/elements/1.1/\";\r\n"); + scriptBuilder.append("declare_ns oaf = \"http://namespace.openaire.eu/oaf\";\r\n"); + + //scriptBuilder.append("$a1 = Convert(xpath:\"//dc:language[2]\",LangVocab);\r\n"); + //scriptBuilder.append("dri:mdFormat = $a1;\r\n"); + scriptBuilder.append("dr:CobjMDFormats = xpath:\"//dc:title\";\r\n"); + scriptBuilder.append("dr:repositoryName = getValue(PROFILEFIELD, [xpath:\"//dri:repositoryId\", xpath:\"//CONFIGURATION/OFFICIAL_NAME\"]);\r\n"); + scriptBuilder.append("dc:title = copy(\"dc:title\", \"//dc:title\", \"@*|node()\");\r\n"); + //scriptBuilder.append("if xpath:\"//dc:title\" dr:aggregatorName = \"abc\"; else dr:repositoryName = \"def\";\r\n"); + scriptBuilder.append("if xpath:\"//dc:language[4]\" dr:CobjTypology = xpath:\"//dc:language[2]\"; else dr:CobjTypology = \"test\";\r\n"); + scriptBuilder.append("apply xpath:\"//dc:identifier\" if xpath:\"starts-with(normalize-space(.), 'http')\" dc:identifier = xpath:\"normalize-space(.)\"; else dr:CobjIdentifier = xpath:\"normalize-space(.)\";\r\n"); + //scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language[2]\",LangVocab);\r\n"); + scriptBuilder.append("dc:language = Convert(xpath:\"normalize-space(//dc:language[2])\",LangVocab);\r\n"); + scriptBuilder.append("$var0 = \"''\";\r\n"); + scriptBuilder.append("if xpath:\"//dc:format[text()='digital']\" dc:publisher = xpath:\"//dc:publisher\"; else dc:publisher = skipRecord();\r\n"); + scriptBuilder.append("end\r\n"); + + return scriptBuilder.toString(); + } + private RuleLanguageParser getRuleLanguageParser(String aTransformationScript){ + RuleLanguageParser parser = new RuleLanguageParser(); + System.out.println(aTransformationScript); + StringReader reader = new StringReader(aTransformationScript); + parser.parse(reader); + return parser; + } + +} diff --git a/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/core/TransformationImplTest.java b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/core/TransformationImplTest.java new file mode 100644 index 0000000..bc713d7 --- /dev/null +++ b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/core/TransformationImplTest.java @@ -0,0 +1,98 @@ +package eu.dnetlib.data.collective.transformation.engine.core; + +import java.io.StringReader; + +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; + +import org.junit.Before; +import org.junit.Test; +import org.springframework.core.io.ClassPathResource; +import org.springframework.core.io.Resource; + +import eu.dnetlib.data.collective.transformation.rulelanguage.RuleLanguageParser; + +public class TransformationImplTest { + + private static final String xslTemplatePath = "eu/dnetlib/data/collective/transformation/engine/template.xsl"; + private transient Resource xslTemplateResource = new ClassPathResource(xslTemplatePath); +// private static final String schemaPath = "eu/dnetlib/data/collective/transformation/schema/DMFSchema_vTransformator.xsd"; + private static final String schemaPath = "eu/dnetlib/data/collective/transformation/schema/OAFSchema_vTransformator.xsd"; + private transient Resource schemaResource = new ClassPathResource(schemaPath); + private transient RuleLanguageParser parser; + private static final String dataSinkId = "132"; + + private transient TransformationImpl transformation; + + @Before + public void setUp(){ + System.setProperty("javax.xml.transform.TransformerFactory", "net.sf.saxon.TransformerFactoryImpl"); + + parser = new RuleLanguageParser(); + + transformation = new TransformationImpl(); + transformation.addJobConstant(TransformationImpl.JOBCONST_DATASINKID, dataSinkId); + transformation.setTemplate(xslTemplateResource); + transformation.setSchema(schemaResource); + transformation.init(); + transformation.setRuleLanguageParser(parser); + + } + + @Test + public void testStyleSheet() throws TransformerConfigurationException{ + StringReader reader = new StringReader(dc2DmfScript()); + parser.parse(reader); + try{ + transformation.configureTransformation(); + }catch(TransformerConfigurationException e){ + e.printStackTrace(); + } + System.out.println(transformation.dumpStylesheet()); + } + + private String dc2DmfScript(){ + StringBuilder scriptBuilder = new StringBuilder(); + scriptBuilder.append("declare_script \"MainSample\";\r\n"); + + scriptBuilder.append("declare_ns oaf = \"http://namespace.openaire.eu/oaf\";\r\n"); + scriptBuilder.append("declare_ns dr = \"http://www.driver-repository.eu/namespace/dr\";\r\n"); + scriptBuilder.append("declare_ns dri = \"http://www.driver-repository.eu/namespace/dri\";\r\n"); + scriptBuilder.append("declare_ns dc = \"http://purl.org/dc/elements/1.1/\";\r\n"); + + scriptBuilder.append("$var0 = \"''\";\r\n"); + scriptBuilder.append("static $var1 = RegExpr($job.datasinkid, $var0, \"s/^(dnet:\\/\\/MDStoreDS\\/)|(\\?.*)//g\");\r\n"); + scriptBuilder.append("dr:objectIdentifier = xpath:\"//dri:objIdentifier\";\r\n"); + scriptBuilder.append("dr:dateOfCollection = getValue(CURRENTDATE, []);\r\n"); + scriptBuilder.append("dr:CobjContentSynthesis = empty;\r\n"); + scriptBuilder.append("dr:CobjTypology = \"Textual\";\r\n"); + scriptBuilder.append("dr:CobjModel = \"OAI\";\r\n"); + scriptBuilder.append("dr:CobjMdFormats = \"oai_dc\";\r\n"); + scriptBuilder.append("dr:CobjDescriptionSynthesis = empty;\r\n"); + scriptBuilder.append("//dr:aggregatorName = getValue(PROFILEFIELD, [\"transformationmanager-service-profile-id\", xpath:\"//PROPERTY/@key='name'\"]);\r\n"); + scriptBuilder.append("dr:aggregatorInstitution = empty;\r\n"); + scriptBuilder.append("dr:repositoryName = getValue(PROFILEFIELD, [xpath:\"//dri:repositoryId\", xpath:\"//CONFIGURATION/OFFICIAL_NAME\"]);\r\n"); + scriptBuilder.append("dr:repositoryLink = getValue(PROFILEFIELD, [xpath:\"//dri:repositoryId\", xpath:\"//REPOSITORY_WEBPAGE\"]);\r\n"); + scriptBuilder.append("dr:repositoryCountry = getValue(PROFILEFIELD, [xpath:\"//dri:repositoryId\", xpath:\"//COUNTRY\"]);\r\n"); + scriptBuilder.append("dr:repositoryInstitution = getValue(PROFILEFIELD, [xpath:\"//dri:repositoryId\", xpath:\"//REPOSITORY_INSTITUTION\"]);\r\n"); + scriptBuilder.append("dc:creator = xpath:\"//dc:creator\";\r\n"); + scriptBuilder.append("dc:title = xpath:\"//dc:title\";\r\n"); + scriptBuilder.append("dc:subject = xpath:\"//dc:subject\";\r\n"); + scriptBuilder.append("dr:CobjCategory = Convert(xpath:\"//dc:type\", TextTypologies);\r\n"); + scriptBuilder.append("%templ = split(xpath:\"/dc:creator\", \"dc:creator\", \";\");\r\n"); + //scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language\", Languages);\r\n"); +// scriptBuilder.append("dc:language = Extract(language);\r\n"); +// scriptBuilder.append("dc:dateAccepted = Convert(xpath:\"//dc:date\", DateISO8601);\r\n"); +// scriptBuilder.append("apply xpath:\"//dc:identifier\" if xpath:\"starts-with(., 'http')\" dc:identifier = xpath:\".\"; else dr:CobjIdentifier = xpath:\".\";\r\n"); + scriptBuilder.append("apply xpath:\"//dc:relation\" if xpath:\"starts-with(., 'http')\" dc:identifier = RegExpr(xpath:\"//dc:relation\", xpath:\"//dc:relation\", \"s/^(.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/)//g\"); else dr:CobjIdentifier = xpath:\".\";\r\n"); +// apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:"normalize-space(.)"; + // scriptBuilder.append("dc:publisher = xpath:\"//dc:publisher\";\r\n"); +// scriptBuilder.append("dc:source = xpath:\"//dc:source\";\r\n"); +// scriptBuilder.append("dc:contributor = xpath:\"//dc:contributor\";\r\n"); +// scriptBuilder.append("dc:relation = xpath:\"//dc:relation\";\r\n"); +// scriptBuilder.append("dc:description = xpath:\"//dc:description\";\r\n"); + scriptBuilder.append("end\r\n"); + return scriptBuilder.toString(); + } + +} diff --git a/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/ConvertTest.java b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/ConvertTest.java new file mode 100644 index 0000000..c981a5b --- /dev/null +++ b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/ConvertTest.java @@ -0,0 +1,98 @@ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import static org.junit.Assert.*; +import static org.mockito.Mockito.*; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.io.SAXReader; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; + +import eu.dnetlib.common.profile.Resource; +import eu.dnetlib.data.collective.transformation.VocabularyMap; +import eu.dnetlib.data.collective.transformation.VocabularyRegistry; +import org.mockito.junit.MockitoJUnitRunner; + +@RunWith(MockitoJUnitRunner.class) +public class ConvertTest { + + static final String type_vocabulary = "type_vocabulary.xml"; + static final String lang_vocabulary = "lang_vocabulary.xml"; + Convert c; + Vocabulary v_type; + Vocabulary v_lang; + @Mock + private transient VocabularyRegistry vocabularyRegistry; + private transient VocabularyMap vocabularyMapWrapper = new VocabularyMap(); + private transient Map vocabulariesMap = new HashMap(); + + @Before + public void setUp(){ + + c = new Convert(); + v_type = new Vocabulary(); + v_type.setCaseSensitive(false); + v_type.setResource(getResource(type_vocabulary)); + v_lang = new Vocabulary(); + v_lang.setCaseSensitive(false); + v_lang.setDelimiter("/"); + v_lang.setResource(getResource(lang_vocabulary)); + v_type.setName("someQuery"); + vocabulariesMap.put(v_type.getVocabularyName(), v_type); + v_lang.setName("someQuery"); + vocabulariesMap.put(v_lang.getVocabularyName(), v_lang); + vocabularyMapWrapper.setMap(vocabulariesMap); + when(vocabularyRegistry.getVocabularies()).thenReturn(vocabularyMapWrapper); + c.setVocabularyRegistry(vocabularyRegistry); + when(vocabularyRegistry.getVocabulary(v_type.getVocabularyName())).thenReturn(v_type); + when(vocabularyRegistry.getVocabulary(v_lang.getVocabularyName())).thenReturn(v_lang); + } + + private Resource getResource(String vocabularyName){ + Resource r = null; + SAXReader reader = new SAXReader(); + Document d; + try { + d = reader.read(this.getClass().getClassLoader().getResourceAsStream(vocabularyName)); + r = new Resource(d); + } catch (DocumentException e) { + e.printStackTrace(); + } + return r; + } + + @Test + public void testTypeEncoding() throws ProcessingException{ + String[] values1 = {"abc"}; + assertEquals("0000", c.executeSingleValue(v_type.getVocabularyName(), Arrays.asList(values1))); + String[] values2 = {"abc", "Aufsatz"}; + assertEquals("0001", c.executeSingleValue(v_type.getVocabularyName(), Arrays.asList(values2))); + //String[] values3 = {"Conference report"}; + String[] values3 = {"Conference or workshop item"}; + assertEquals("0004", c.executeSingleValue(v_type.getVocabularyName(), Arrays.asList(values3))); + + } + + @Test + public void testLangEncoding() throws ProcessingException{ + String[] values1 = {"he"}; + assertEquals("heb", c.executeSingleValue(v_lang.getVocabularyName(), Arrays.asList(values1))); + String[] values2 = {"jkjhh"}; + assertEquals("und", c.executeSingleValue(v_lang.getVocabularyName(), Arrays.asList(values2))); + String[] values3 = {"german"}; + assertEquals("deu/ger", c.executeSingleValue(v_lang.getVocabularyName(), Arrays.asList(values3))); + String[] values4 = {"eng"}; + assertEquals("eng", c.executeSingleValue(v_lang.getVocabularyName(), Arrays.asList(values4))); + String[] values5 = {"ger"}; + assertEquals("deu/ger", c.executeSingleValue(v_lang.getVocabularyName(), Arrays.asList(values5))); + String[] values6 = {"deu/ger"}; + assertEquals("deu/ger", c.executeSingleValue(v_lang.getVocabularyName(), Arrays.asList(values6))); + } +} diff --git a/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/DateVocabularyTest.java b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/DateVocabularyTest.java new file mode 100644 index 0000000..206f084 --- /dev/null +++ b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/DateVocabularyTest.java @@ -0,0 +1,73 @@ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import static org.junit.Assert.*; + +import java.util.Arrays; +import java.util.List; + +import org.junit.Before; +import org.junit.Test; + +public class DateVocabularyTest { + + private transient DateVocabulary dateVocab; + + @Before + public void setUp(){ + dateVocab = new DateVocabulary(); + } + + @Test + public void test()throws ProcessingException{ + // '2004' has higher priority than '01.07.1998' in this implementation + String[] date = {"2004", "01.07.1998"}; + assertEquals("2004-01-01", dateVocab.encoding(Arrays.asList(date))); + String[] date2 = {"02-03-2009"}; + assertEquals("2009-03-02", dateVocab.encoding(Arrays.asList(date2))); + String[] date3 = {"02/03/2009"}; + assertEquals("2009-03-02", dateVocab.encoding(Arrays.asList(date3))); + String[] date4 = {"2012-03-28T08:44:17Z"}; + assertEquals("2012-03-28", dateVocab.encoding(Arrays.asList(date4))); + String[] date5 = {"17750/1799"}; // invalid date + assertEquals("", dateVocab.encoding(Arrays.asList(date5))); + String[] date6 = {"2002-4-18"}; + assertEquals("2002-04-18", dateVocab.encoding(Arrays.asList(date6))); + String[] date7 = {"2003-12 "}; + assertEquals("2003-12-01", dateVocab.encoding(Arrays.asList(date7))); + + } + + @Test + public void testDefaultPattern() throws ProcessingException{ + String[] date = {"2011", "info:eu-repo/date/embargoEnd/2011-05-12", "2004-03-15", "2009-02-24T13:27:42Z", "2009-02", "2009"}; + // the embargoEnd date is ignored, then the resulting list size must be 5 + assertEquals(5, dateVocab.encoding(Arrays.asList(date), "yyyy-MM-dd", "").size()); + } + + @Test + public void testDefaultPatternWithFilter() throws ProcessingException{ + String[] date = {"2011", "info:eu-repo/date/embargoEnd/2011-05-12", "2004-03-15", "2009-02-24T13:27:42Z", "2009-02", "2009"}; + // the embargoEnd date is ignored, then the oldest date must be '2004-03-15' + List results = dateVocab.encoding(Arrays.asList(date), "yyyy-MM-dd", "min()"); + assertEquals(1, results.size()); + assertEquals("2004-03-15", results.get(0)); + } + + @Test + public void testDefaultPatternWithFilterAndInvalidDate() throws ProcessingException{ + String[] date = {"2011", "2004-03-15", "2009-02-24T13:27:42Z", "2009-02", "2009", "17750/1799"}; + // the embargoEnd date is ignored, then the oldest date must be '2004-03-15' + List results = dateVocab.encoding(Arrays.asList(date), "yyyy-MM-dd", "min()"); + assertEquals(1, results.size()); + assertEquals("2004-03-15", results.get(0)); + } + + @Test + public void testDefaultPatternWithFilterAndNoValidDate() throws ProcessingException{ + String[] date = {"uuuu"}; + // the embargoEnd date is ignored, then the oldest date must be '2004-03-15' + List results = dateVocab.encoding(Arrays.asList(date), "yyyy-MM-dd", "min()"); + assertEquals(0, results.size()); + } + +} diff --git a/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/RegularExpressionTest.java b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/RegularExpressionTest.java new file mode 100644 index 0000000..985a268 --- /dev/null +++ b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/RegularExpressionTest.java @@ -0,0 +1,74 @@ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import static org.junit.Assert.*; + +import org.apache.oro.text.perl.Perl5Util; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.junit.MockitoJUnitRunner; + +@RunWith(MockitoJUnitRunner.class) +public class RegularExpressionTest { + + private transient RegularExpression regExprFunc; + + @Before + public void setUp(){ + regExprFunc = new RegularExpression(); + } + + @Test + public void testSubstitute() throws ProcessingException{ + + String projId = "241479"; + String regExpr = "s/^(.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/)//gm"; + String input = "info:eu-repo/grantAgreement/EC/FP7/241479"; + assertEquals(projId, regExprFunc.executeSingleValue(regExpr, input, "anyValue")); + + } + +// @Test +// public void testExtractGrantNumber() throws ProcessingException{ +// String projectStringCorrect = "info:eu-repo/grantAgreement/EC/FP7/258169/EU/Game and Learning Alliance/GALA"; +// String projectStringIssue = "info:eu-repo/grantAgreement/EC/FP7//EU/A Digital Library Infrastructure on Grid Enabled Technology./DILIGENT"; +// +// String regExpr = "s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/(\\d\\d\\d\\d\\d\\d).*/$1/gm"; +//// String regExpr = "s/^(.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/)[0-9]6(.*)//gm"; +//// String regExpr = "s/^(.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/)[0-9]6(.*)//gm"; +// String grantId = "258169"; +// assertEquals(grantId, regExprFunc.executeSingleValue(regExpr, projectStringCorrect, "anyValue")); +// assertEquals("", regExprFunc.executeSingleValue(regExpr, projectStringIssue, "anyValue")); +// } + + @Test + public void testExtractPMC() throws ProcessingException{ + String input = "PMC: PMC169570"; + String regExpr = "s/^(.*PMC: )//gm"; + String pmcid = "PMC169570"; + assertEquals(pmcid, regExprFunc.executeSingleValue(regExpr, input, "anyValue")); + } + + @Test + public void testExtractDoi() throws ProcessingException{ + String input = "doi: 10.1093/dnares/dsn019"; + String regExpr = "s/^(.*doi: )//gm"; + String doiid = "10.1093/dnares/dsn019"; + assertEquals(doiid, regExprFunc.executeSingleValue(regExpr, input, "anyValue")); + } + + @Test + public void testExtractIssn() throws ProcessingException{ + String issn = "02582279"; +// String regExpr = "s/^(.*issn=[~0-9]*)([^&]*)//gm"; +// String regExpr = "s/^(.*issn=([^&]+).*)?//gm"; + String regExpr = "m/issn=([^&]+)/"; + String input = "http://www.doaj.org/doaj?func=openurl&genre=article&issn=02582279&date=1995&volume=16&issue=2&spage=215"; + assertEquals(issn, regExprFunc.executeSingleValue(regExpr, input, "anyValue")); +// Perl5Util perlRegExpr = new Perl5Util(); +// if (perlRegExpr.match(regExpr, input)) +// System.out.println(perlRegExpr.group(1)); +// else +// System.out.println("no match."); + } +} diff --git a/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/RetrieveValueTest.java b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/RetrieveValueTest.java new file mode 100644 index 0000000..29ebea2 --- /dev/null +++ b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/RetrieveValueTest.java @@ -0,0 +1,93 @@ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import static org.junit.Assert.*; +import static org.mockito.Mockito.*; + +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; + + +import org.dom4j.DocumentException; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; + +import eu.dnetlib.common.profile.Resource; +import eu.dnetlib.common.profile.ResourceDao; +import eu.dnetlib.data.collective.transformation.rulelanguage.Argument; +import eu.dnetlib.data.collective.transformation.rulelanguage.Argument.Type; +import org.mockito.junit.MockitoJUnitRunner; + +@RunWith(MockitoJUnitRunner.class) +public class RetrieveValueTest { + + private static final String profileId = "profileId-123"; + private static final String profileIdXpath = "//someXpathProfileId"; // "concat('collection('/db/DRIVER/RepositoryServiceResources')//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key="NamespacePrefix"][value="', //someXpathProfileId, '"]]')"; + private static final String xpathExpr = "//someXpath"; + private static final String profileFieldValue = "someValue"; + private transient RetrieveValue retrieveValueFunc; + + @Mock + private transient ResourceDao resourceDao; + @Mock + private transient Resource resource; + + @Before + public void setUp(){ + try{ + //when(resourceDao.getResourceByQuery(profileId)).thenReturn(resource); + when(resourceDao.getResource(profileId)).thenReturn(resource); + }catch(Exception e){ + e.printStackTrace(); + } + when(resource.getValue(xpathExpr)).thenReturn(profileFieldValue); + retrieveValueFunc = new RetrieveValue(); + retrieveValueFunc.setResourceDao(resourceDao); + } + + @Test + public void testValueFromProfileByValue()throws ProcessingException, DocumentException{ + String record = "" + profileId + ""; + List paramList = new LinkedList(); + paramList.add(new Argument(Type.VALUE, profileId)); + paramList.add(new Argument(Type.INPUTFIELD, xpathExpr)); + assertEquals(profileFieldValue, retrieveValueFunc.executeSingleValue(RetrieveValue.FUNCTION.PROFILEFIELD.name(), paramList, record, new LinkedHashMap())); + } + + @Test + public void testValueFromProfileByXpath()throws ProcessingException, DocumentException{ + String record = "" + profileId + ""; + List paramList = new LinkedList(); + paramList.add(new Argument(Type.INPUTFIELD, profileIdXpath)); + paramList.add(new Argument(Type.INPUTFIELD, xpathExpr)); + assertEquals(profileFieldValue, retrieveValueFunc.executeSingleValue(RetrieveValue.FUNCTION.PROFILEFIELD.name(), paramList, record, new LinkedHashMap())); + } + + @Test(expected = IllegalArgumentException.class) + public void testInvalidFunctionName()throws ProcessingException, DocumentException{ + List paramList = new LinkedList(); + paramList.add(new Argument(Type.VALUE, profileId)); + paramList.add(new Argument(Type.INPUTFIELD, xpathExpr)); + assertEquals(profileFieldValue, retrieveValueFunc.executeSingleValue("someInvalidFunctionName", paramList, "", new LinkedHashMap())); + } + + @Test(expected = ProcessingException.class) + public void testInvalidProfileId()throws ProcessingException, DocumentException{ + String profileId = "invalidProfileId"; + String xpathExpr = "//someExpr"; + + try { + when(resourceDao.getResource(profileId)).thenReturn(null); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + List paramList = new LinkedList(); + paramList.add(new Argument(Type.VALUE, profileId)); + paramList.add(new Argument(Type.INPUTFIELD, xpathExpr)); + assertEquals(profileFieldValue, retrieveValueFunc.executeSingleValue(RetrieveValue.FUNCTION.PROFILEFIELD.name(), paramList, "", new LinkedHashMap())); + } +} diff --git a/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/SplitTest.java b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/SplitTest.java new file mode 100644 index 0000000..e60ca01 --- /dev/null +++ b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/engine/functions/SplitTest.java @@ -0,0 +1,35 @@ +/** + * + */ +package eu.dnetlib.data.collective.transformation.engine.functions; + +import static org.junit.Assert.*; + +import java.util.Arrays; + +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.junit.MockitoJUnitRunner; + +/** + * @author js + * + */ +@RunWith(MockitoJUnitRunner.class) +public class SplitTest { + private transient Split splitFunc; + + @Before + public void setUp(){ + splitFunc = new Split(); + } + + @Test + public void testSplit() throws ProcessingException{ + String[] authors = {"Simon, Bolivar ; Müller, Heiner"}; + assertEquals("Simon, Bolivar", splitFunc.executeSingleValue(Arrays.asList(authors), ";", "idxyz")); + assertEquals("Müller, Heiner", splitFunc.executeSingleValue("idxyz")); + assertEquals(null, splitFunc.executeSingleValue("idxyz")); + } +} diff --git a/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/rulelanguage/RuleLanguageTest.java b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/rulelanguage/RuleLanguageTest.java new file mode 100644 index 0000000..cdee98d --- /dev/null +++ b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/rulelanguage/RuleLanguageTest.java @@ -0,0 +1,558 @@ +package eu.dnetlib.data.collective.transformation.rulelanguage; + +import static org.junit.Assert.*; + +import java.io.StringReader; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import org.junit.Test; +import org.svenson.JSONParser; + +import eu.dnetlib.data.collective.transformation.engine.functions.IdentifierExtract; +import eu.dnetlib.data.collective.transformation.engine.functions.Lookup; +import eu.dnetlib.data.collective.transformation.engine.functions.RegularExpression; +import eu.dnetlib.data.collective.transformation.rulelanguage.Argument.Type; +import eu.dnetlib.data.collective.transformation.rulelanguage.parser.ASTMyScript.SCRIPTTYPE; +import eu.dnetlib.data.collective.transformation.rulelanguage.visitor.RuleLanguageVisitor; + + +public class RuleLanguageTest { + + private static final String declareScript = "declare_script \"abc\";\r\n"; + private static final String endScript = "end"; + private StringBuilder scriptBuilder; + + private RuleLanguageVisitor parseValid(String stmt) { + RuleLanguageParser rlp = new RuleLanguageParser(); + StringReader r = new StringReader(stmt); + rlp.parse(r); + return rlp.getVisitor(); + } + + @Test + public void parseMinimalScript(){ + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals("abc", v.getScriptName()); + assertEquals(SCRIPTTYPE.MAINSCRIPT, v.getScriptType()); + } + + @Test + public void parseNameSpace(){ + final String ns_decl = "declare_ns somePrefix = \"http://someHost/somePath/someVersion/1.0/\";\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ns_decl); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals("http://someHost/somePath/someVersion/1.0/", v.getNamespaceDeclarations().get("somePrefix")); + } + + @Test + public void parsePreprocessing(){ + final String preproc = "preprocess abc = dblookup(\"select * from xyz\");\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(preproc); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertTrue(v.getPreprocessings().get(0).containsKey("dblookup")); + } + + @Test + public void parsePreprocessingBlacklist(){ + final String preproc = "blacklist(\"//RESOURCE_IDENTIFIER/@value\");\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(preproc); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertTrue(v.getPreprocessings().get(0).containsKey("blacklist")); + assertEquals("//RESOURCE_IDENTIFIER/@value", v.getPreprocessings().get(0).get("blacklist")); + Argument argXpath = new Argument(Type.INPUTFIELD, v.getPreprocessings().get(0).get("blacklist")); + assertEquals("//RESOURCE_IDENTIFIER/@value", argXpath.getArgument()); + } + + @Test + public void testLookup(){ + final String rule = "dc:relation = lookup(xpath:\"//proc:provenance\", \"name\");\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(rule); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + Set s = v.getElementMappingRules().get("dc:relation"); + assertNotNull(s); + assertEquals(1, s.size()); + assertEquals("lookup", (s.toArray(new Rules[0]))[0].getFunctionCall().getExternalFunctionName()); + assertEquals("//proc:provenance", (s.toArray(new Rules[0]))[0].getFunctionCall().getParameters().get(Lookup.paramExprIdentifier)); + assertEquals("name", (s.toArray(new Rules[0]))[0].getFunctionCall().getParameters().get(Lookup.paramExprProperty)); + } + + @Test + public void testLookupVar(){ +// final String rule = "$var = "; + final String rule = "dc:relation = lookup($var0, \"name\");\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(rule); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + Set s = v.getElementMappingRules().get("dc:relation"); + assertNotNull(s); + assertEquals(1, s.size()); + assertEquals("lookup", (s.toArray(new Rules[0]))[0].getFunctionCall().getExternalFunctionName()); + assertEquals("$var0", (s.toArray(new Rules[0]))[0].getFunctionCall().getParameters().get(Lookup.paramExprIdentifier)); + assertEquals("name", (s.toArray(new Rules[0]))[0].getFunctionCall().getParameters().get(Lookup.paramExprProperty)); + } + + @Test + public void parseAssignXpathRule(){ + final String ruleAssignWithXpathExpr = "dri:title = xpath:\"//dc:title\";\r\n"; + final String comment = "// some comment\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(comment); + scriptBuilder.append(ruleAssignWithXpathExpr); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + Set s = v.getElementMappingRules().get("dri:title"); + assertNotNull(s); + assertEquals(1, s.size()); + assertEquals("//dc:title", (s.toArray(new Rules[0]))[0].getXpath()); + } + + @Test + public void parseConstantRule(){ + final String ruleConstant = "dri:CobjMDformats = \"oai_dc\";\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleConstant); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(1, v.getElementMappingRules().size()); + Set s = v.getElementMappingRules().get("dri:CobjMDformats"); + for (IRule r: s){ + assertEquals("oai_dc", ((Rules)r).getConstant()); + } + } + + @Test + public void parseFuncGetValueRule(){ + final String ruleFuncGetvalue = "static dri:repositoryName = getValue(profileField,[$job.recordprefix, def]);\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleFuncGetvalue); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(1, v.getElementMappingRules().size()); + Set s = v.getElementMappingRules().get("dri:repositoryName"); + for (IRule r: s){ + assertEquals("getValue", ((Rules)r).getFunctionCall().getExternalFunctionName()); + //assertEquals("dnetExt:getValue(profileField)", r.getFunction()); + assertEquals(2, ((Rules)r).getFunctionCall().getArguments().size()); + assertEquals("$job.recordprefix", ((Rules)r).getFunctionCall().getArguments().get(0).getArgument()); + assertEquals(true, ((Rules)r).isStatic()); + } + } + + @Test + public void parseFuncGetValueXpathArgsRule(){ + final String ruleFuncGetvalue = "dri:repositoryName = getValue(profileField,[xpath:\"//node1\", xpath:\"//node2\"]);\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleFuncGetvalue); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(1, v.getElementMappingRules().size()); + Set s = v.getElementMappingRules().get("dri:repositoryName"); + for (IRule r: s){ + assertEquals("getValue", ((Rules)r).getFunctionCall().getExternalFunctionName()); + //assertEquals("dnetExt:getValue(profileField)", r.getFunction()); + assertEquals(2, ((Rules)r).getFunctionCall().getArguments().size()); + assertEquals(false, ((Rules)r).isStatic()); + Argument arg1 = ((Rules)r).getFunctionCall().getArguments().get(0); + assertTrue(arg1.isInputField()); + assertEquals("//node1", arg1.getArgument()); + Argument arg2 = ((Rules)r).getFunctionCall().getArguments().get(1); + assertTrue(arg2.isInputField()); + assertEquals("//node2", arg2.getArgument()); + } + } + + @Test + public void parseFuncConvert(){ + final String ruleFuncConvert_withPrefixes = "somePrefix:lv2 = Convert(xpath:\"/dc:metadata/dc:language\",LangVocab);\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleFuncConvert_withPrefixes); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(1, v.getElementMappingRules().size()); + Set s = v.getElementMappingRules().get("somePrefix:lv2"); + for (IRule r: s){ + assertEquals("convert", ((Rules)r).getFunctionCall().getExternalFunctionName()); + //assertEquals("dnetExt:convert(/metadata/language,LangVocab)", r.getFunction()); + } + } + + @Test + public void parseFuncExtract(){ + final String ruleFuncExtract = "somePrefix:lv = Extract(Language);\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleFuncExtract); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(1, v.getElementMappingRules().size()); + Set s = v.getElementMappingRules().get("somePrefix:lv"); + for (IRule r: s){ + assertEquals("extract", ((Rules)r).getFunctionCall().getExternalFunctionName()); + assertEquals("Language", ((Rules)r).getFunctionCall().getParameters().get("feature")); + } + } + + @Test + public void parseFuncConcat(){ + final String ruleFuncConcat = "sp:lv = concat(\"value abc\", $var1);\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleFuncConcat); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(1, v.getElementMappingRules().size()); + Set s = v.getElementMappingRules().get("sp:lv"); + for (IRule r: s){ + assertEquals("concat", ((Rules)r).getFunctionCall().getExternalFunctionName()); + assertEquals(2, ((Rules)r).getFunctionCall().getParamList().size()); + } + } + + @Test + public void parseFuncSplit(){ + final String ruleFuncSplit = "%template1 = split(xpath:\"/dc:creator\", \"dc:creator\", \";\");\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleFuncSplit); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(1, v.getTemplateMappingRules().size()); + IRule r = v.getTemplateMappingRules().get("%template1"); + assertEquals("split", ((Rules)r).getFunctionCall().getExternalFunctionName()); + + } + + @Test + public void parseFuncConvertWithAttr(){ + final String ruleFuncConvert_withAttribute = "somePrefix:lv3 = Convert(xpath:\"/dc:metadata/language/@attr\",LangVocab);\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleFuncConvert_withAttribute); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(1, v.getElementMappingRules().size()); + Set s = v.getElementMappingRules().get("somePrefix:lv3"); + for (IRule r: s){ + assertEquals("convert", ((Rules)r).getFunctionCall().getExternalFunctionName()); + //assertEquals("dnetExt:convert(/dc:metadata/language[@attr],LangVocab)", r.getFunction()); + } + } + + @Test + public void parseFuncConvertWithOptParams(){ + final String ruleFuncConvert_withAttribute = "somePrefix:lv3 = Convert(xpath:\"/dc:metadata/dc:date\",DateISO, \"YYYY-MM-DD\", \"MIN\" );\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleFuncConvert_withAttribute); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(1, v.getElementMappingRules().size()); + Set s = v.getElementMappingRules().get("somePrefix:lv3"); + for (IRule r: s){ + assertEquals("convert", ((Rules)r).getFunctionCall().getExternalFunctionName()); + //assertEquals("dnetExt:convert(/dc:metadata/language[@attr],LangVocab)", r.getFunction()); + } + } + + @Test + public void parseFuncIdentifierExtract(){ + final String ruleFuncIdentiferExtract = "pref:elem = identifierExtract('[\"dc:identifer\", \"dc:relation\"]', xpath:\"/\", 'regExpr');\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleFuncIdentiferExtract); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(1, v.getElementMappingRules().size()); + Set s = v.getElementMappingRules().get("pref:elem"); + for (IRule r: s){ + assertEquals("identifierExtract", ((Rules)r).getFunctionCall().getExternalFunctionName()); + String xpathJsonString = ((Rules)r).getFunctionCall().getParameters().get(IdentifierExtract.paramXpathExprJson); + assertEquals("[\"dc:identifer\", \"dc:relation\"]", xpathJsonString); + List xpathExprList = JSONParser.defaultJSONParser().parse(List.class, xpathJsonString); + assertEquals(2, xpathExprList.size()); + } + } + + @Test + public void parseConditionalRuleWithDistinctTargetElements(){ + final String ruleConditional = "if xpath:\"//dc:metadata/dc:language\" somePrefix:a = \"primaryRule\"; else somePrefix:b = \"secondaryRule\";\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleConditional); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(2, v.getElementMappingRules().size()); + Set s1 = v.getElementMappingRules().get("somePrefix:a"); + assertEquals(1, s1.size()); + Set s2 = v.getElementMappingRules().get("somePrefix:b"); + assertEquals(1, s2.size()); + for (IRule rCon: s1){ + Rules r = (Rules)rCon; + assertEquals("//dc:metadata/dc:language", r.getCondition().getConditionExpression()); + assertTrue(r.hasCondition()); + assertTrue(r.getCondition().isPrimary(r)); + } + for (IRule rCon: s2){ + Rules r = (Rules)rCon; + assertEquals("//dc:metadata/dc:language", r.getCondition().getConditionExpression()); + assertTrue(r.hasCondition()); + if (r.getCondition().isPrimary(r)){ + assertNotSame(r.getCondition().getSecondaryRule(), r); + assertEquals(r.getCondition().getPrimaryRule(), r); + } + assertNotSame(r.getCondition().getPrimaryRule().getUniqueName(), r.getCondition().getSecondaryRule().getUniqueName()); + } + } + + @Test + public void parseConditionalRuleWithParamAsConditionExpression(){ + final String ruleConditional = "if xpath:\"$var1='Aggregator'\" somePrefix:a = \"primaryRule\"; else somePrefix:b = \"secondaryRule\";\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleConditional); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(2, v.getElementMappingRules().size()); + Set s1 = v.getElementMappingRules().get("somePrefix:a"); + assertEquals(1, s1.size()); + Set s2 = v.getElementMappingRules().get("somePrefix:b"); + assertEquals(1, s2.size()); + for (IRule rCon: s1){ + Rules r = (Rules)rCon; + assertEquals("$var1='Aggregator'", r.getCondition().getConditionExpression()); + assertTrue(r.hasCondition()); + assertTrue(r.getCondition().isPrimary(r)); + } + for (IRule rCon: s2){ + Rules r = (Rules)rCon; + assertEquals("$var1='Aggregator'", r.getCondition().getConditionExpression()); + assertTrue(r.hasCondition()); + if (r.getCondition().isPrimary(r)){ + assertNotSame(r.getCondition().getSecondaryRule(), r); + assertEquals(r.getCondition().getPrimaryRule(), r); + } + assertNotSame(r.getCondition().getPrimaryRule().getUniqueName(), r.getCondition().getSecondaryRule().getUniqueName()); + } + } + + @Test + public void parseConditionalRuleWithSameTargetElements(){ + final String ruleConditional = "if xpath:\"//dc:metadata/dc:language\" somePrefix:a = \"primaryRule\"; else somePrefix:a = \"secondaryRule\";\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleConditional); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(1, v.getElementMappingRules().size()); + // the output element 'somePrefix:a' has two alternative rules + Set s = v.getElementMappingRules().get("somePrefix:a"); + assertEquals(2, s.size()); + for (IRule rCon: s){ + Rules r = (Rules)rCon; + assertEquals("//dc:metadata/dc:language", r.getCondition().getConditionExpression()); + if (r.getCondition().isPrimary(r)){ + assertEquals(r.getCondition().getPrimaryRule(), r); + }else{ + assertEquals(r.getCondition().getSecondaryRule(), r); + } + assertNotSame(r.getCondition().getPrimaryRule(), r.getCondition().getSecondaryRule()); + assertEquals(r.getCondition().getPrimaryRule().getUniqueName(), r.getCondition().getSecondaryRule().getUniqueName()); + } + } + + @Test + public void parseConditionalRuleWithApplyOnField(){ + final String rule = "apply xpath:\"//dc:identifier\" if xpath:\"starts-with(., 'http:')\" dr:CobjIdentifier = xpath:\".\"; else dc:identifier = xpath:\".\";\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(rule); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(2, v.getElementMappingRules().size()); + Set s1 = v.getElementMappingRules().get("dr:CobjIdentifier"); + for (IRule rCon: s1){ + Rules r = (Rules)rCon; + assertEquals("//dc:identifier", r.getCondition().getApplyExpression()); + } + } + + @Test + public void testCopy(){ + final String rule = "dc:title = copy(\"dc:title\", \"//dc:title\", \"@*|node()\");\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(rule); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + Set s = v.getElementMappingRules().get("dc:title"); + assertNotNull(s); + assertEquals("dc:title", (s.toArray(new Rules[0]))[0].getTemplateMatch()); + } + + @Test + public void testSkip(){ + final String rule = "dc:title = skipRecord();\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(rule); + scriptBuilder.append(endScript); + + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertNotNull(v); + assertEquals(1, v.getElementMappingRules().size()); + Set s1 = v.getElementMappingRules().get("dc:title"); + assertNotNull(s1); + for (IRule rSkip: s1){ + Rules r = (Rules)rSkip; + assertNotNull(r); + assertFalse(r.hasSet()); + assertTrue(r.isSkip()); + } + + } + + @Test + public void parseSetRule(){ + final String rule = "dc:identifier = set(\"CONST\" , @type = \"ABC\"; , @lang = \"ger\";);"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(rule); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(1, v.getElementMappingRules().size()); + Iterator it = v.getElementMappingRules().keySet().iterator(); + while (it.hasNext()){ + System.out.println("key: " + it.next()); + } + Set s1 = v.getElementMappingRules().get("dc:identifier"); + for (IRule rSet: s1){ + Rules r = (Rules)rSet; + assertTrue(r.hasSet()); + List pendingRules = r.getRulesSet().getPendingRules(); + assertEquals(2,pendingRules.size()); + for (Rules pRule: pendingRules){ + assertTrue(pRule.getAttribute().equals("type") || + pRule.getAttribute().equals("lang")); + } + + } + } + + @Test + public void parseRuleWithVariable(){ + final String ruleLine = "$abc = xpath:\"//dc:creator\";\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleLine); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(0, v.getElementMappingRules().size()); + assertEquals(1, v.getVariableMappingRules().size()); + } + + @Test + public void parseRuleWithStaticVariable(){ + final String ruleLine = "static $abc = RegExpr(xpath:\"//someExpr1\", xpath:\"//someExpr2\", \"s/[x1]/[x2]\");\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleLine); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(0, v.getElementMappingRules().size()); + assertEquals(1, v.getVariableMappingRules().size()); + assertEquals(true, ((Rules)v.getVariableMappingRules().get("$abc")).isStatic()); + } + +// @Test + public void parseRegExpr(){ + final String ruleLine = "somePrefix:someElement = RegExpr(xpath:\"//someExpr1\", xpath:\"//someExpr2\", \"s/[x1]/[x2]\");\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleLine); + scriptBuilder.append(endScript); + @SuppressWarnings("unused") + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + } + + @Test + public void parseRegExpr2(){ + final String ruleLine = "somePrefix:someElement = RegExpr($job.recordidentifier, xpath:\"//someExpr2\", \"s/[x1]/[x2]\");\r\n"; + scriptBuilder = new StringBuilder(); + scriptBuilder.append(declareScript); + scriptBuilder.append(ruleLine); + scriptBuilder.append(endScript); + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + assertEquals(1, v.getElementMappingRules().size()); + Set s = v.getElementMappingRules().get("somePrefix:someElement"); + for (IRule r: s){ + assertEquals("regExpr", ((Rules)r).getFunctionCall().getExternalFunctionName()); + //assertEquals("dnetExt:getValue(profileField)", r.getFunction()); + assertEquals("$job.recordidentifier", ((Rules)r).getFunctionCall().getParameters().get(RegularExpression.paramExpr1)); +// assertEquals("$job.recordprefix", ((Rules)r).getFunctionCall().getArguments().get(0).getArgument()); +// assertEquals(true, ((Rules)r).isStatic()); + } + } + + @Test + public void parseDc2DmfScript(){ + scriptBuilder = new StringBuilder(); + scriptBuilder.append("declare_script \"MainSample\";\r\n"); + scriptBuilder.append("declare_ns dr = \"http://www.driver-repository.eu/namespace/dr\";\r\n"); + scriptBuilder.append("declare_ns dri = \"http://www.driver-repository.eu/namespace/dri\";\r\n"); + scriptBuilder.append("declare_ns dc = \"http://purl.org/dc/elements/1.1/\";\r\n"); + + scriptBuilder.append("dr:objectIdentifier = xpath:\"//dri:objIdentifier\";\r\n"); + scriptBuilder.append("dr:dateOfCollection = getValue(CURRENTDATE, []);\r\n"); + scriptBuilder.append("dr:CobjContentSynthesis = empty;\r\n"); + scriptBuilder.append("dr:CobjTypology = \"Textual\";\r\n"); + scriptBuilder.append("dr:CobjModel = \"OAI\";\r\n"); + scriptBuilder.append("dr:CobjMdFormats = \"oai_dc\";\r\n"); + scriptBuilder.append("dr:CobjDescriptionSynthesis = empty;\r\n"); + scriptBuilder.append("//dr:aggregatorName = getValue(PROFILEFIELD, [\"transformationmanager-service-profile-id\", xpath:\"//PROPERTY/@key='name'\"]);\r\n"); + scriptBuilder.append("dr:aggregatorInstitution = empty;\r\n"); + scriptBuilder.append("dr:repositoryName = getValue(PROFILEFIELD, [xpath:\"//dri:repositoryId\", xpath:\"//CONFIGURATION/OFFICIAL_NAME\"]);\r\n"); + scriptBuilder.append("dr:repositoryLink = getValue(PROFILEFIELD, [xpath:\"//dri:repositoryId\", xpath:\"//REPOSITORY_WEBPAGE\"]);\r\n"); + scriptBuilder.append("dr:repositoryCountry = getValue(PROFILEFIELD, [xpath:\"//dri:repositoryId\", xpath:\"//COUNTRY\"]);\r\n"); + scriptBuilder.append("dr:repositoryInstitution = getValue(PROFILEFIELD, [xpath:\"//dri:repositoryId\", xpath:\"//REPOSITORY_INSTITUTION\"]);\r\n"); + scriptBuilder.append("dc:creator = xpath:\"//dc:creator\";\r\n"); + scriptBuilder.append("dc:title = xpath:\"//dc:title\";\r\n"); + scriptBuilder.append("dc:subject = xpath:\"//dc:subject\";\r\n"); +// scriptBuilder.append("dr:CobjCategory = Convert(xpath:\"//dc:type\", TextTypologies);\r\n"); +// scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language\", Languages);\r\n"); +// scriptBuilder.append("dc:dateAccepted = Convert(xpath:\"//dc:date\", DateISO8601);\r\n"); + scriptBuilder.append("apply xpath:\"//dc:identifier\" if xpath:\"starts-with(., 'http')\" dc:identifier = xpath:\".\"; else dr:CobjIdentifier = xpath:\".\";\r\n"); + scriptBuilder.append("apply xpath:\"//dc:relation\" if xpath:\"starts-with(., 'http')\" dc:identifier = Convert(xpath:\"//dc:type\", TextTypologies); else dr:CobjIdentifier = xpath:\".\";\r\n"); + scriptBuilder.append("dc:publisher = xpath:\"//dc:publisher\";\r\n"); + scriptBuilder.append("dc:source = xpath:\"//dc:source\";\r\n"); + scriptBuilder.append("dc:contributor = xpath:\"//dc:contributor\";\r\n"); + scriptBuilder.append("dc:relation = xpath:\"//dc:relation\";\r\n"); + scriptBuilder.append("dc:description = xpath:\"//dc:description\";\r\n"); + scriptBuilder.append("end\r\n"); + @SuppressWarnings("unused") + RuleLanguageVisitor v = parseValid(scriptBuilder.toString()); + } +} diff --git a/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/rulelanguage/TransformationTest.java b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/rulelanguage/TransformationTest.java new file mode 100644 index 0000000..c823b7b --- /dev/null +++ b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/rulelanguage/TransformationTest.java @@ -0,0 +1,118 @@ +package eu.dnetlib.data.collective.transformation.rulelanguage; + +import static org.junit.Assert.assertEquals; + +import java.io.StringReader; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; + +import org.junit.Test; +import org.springframework.core.io.ClassPathResource; + +import eu.dnetlib.data.collective.transformation.core.xsl.ext.TransformationFunctionProxy; +import eu.dnetlib.data.collective.transformation.engine.SimpleTransformationEngine; +import eu.dnetlib.data.collective.transformation.rulelanguage.util.Converter; +import eu.dnetlib.data.collective.transformation.rulelanguage.util.FunctionCall; + + +public class TransformationTest { + + + private static final String declareScript = "declare_script abc;\r\n"; + private static final String ruleTypology = "CobjTypology = Text;\r\n"; + private static final String ruleFormat = "CobjMDformats = oai_dc;\r\n"; + private static final String ruleFuncGetvalue = "repositoryName = getValue(profileField,[abc, def]);\r\n"; + private static final String ruleFuncConvert_withoutPrefixes = "lv1 = Convert(xpath:/metadata/language,LangVocab);\r\n"; + private static final String ruleFuncConvert_withPrefixes = "lv2 = Convert(xpath:/dc:metadata/dc:language,LangVocab);\r\n"; + private static final String ruleFuncConvert_withAttribute = "lv3 = Convert(xpath:/dc:metadata/language[@attr],LangVocab);\r\n"; + private static final String endScript = "end"; + private StringBuilder scriptBuilder; + private StringBuilder recordBuilder; + + + private String buildSourceRecord(String lang){ + recordBuilder = new StringBuilder(); + recordBuilder.append("\r\n"); + recordBuilder.append("\r\n"); + recordBuilder.append("
\r\n"); + recordBuilder.append("\r\n"); + recordBuilder.append("" + lang + " \r\n"); + recordBuilder.append("\r\n"); + recordBuilder.append("\r\n"); + recordBuilder.append("\r\n"); + return recordBuilder.toString(); + } + + private List getSourceRecords(){ + List records = new LinkedList(); + records.add(buildSourceRecord("ger")); + records.add(buildSourceRecord("eng")); + return records; + } + +// private String getStylesheet(String stmt) { +// RuleLanguageParser rlp = new RuleLanguageParser(); +// StringReader r = new StringReader(stmt); +// rlp.parse(r); +// return rlp.getStyleSheet(); +// } +// +// //@Test +// public void parseMinimalScript(){ +// scriptBuilder = new StringBuilder(); +// scriptBuilder.append(declareScript); +// scriptBuilder.append(ruleFuncConvert_withoutPrefixes); +// scriptBuilder.append(endScript); +// System.out.println(getStylesheet(scriptBuilder.toString())); +// } +/* + @Test + public void testTransformationEngine(){ + TreeMap> ruleMapping = new TreeMap>(); + Set ruleSet = new HashSet(); + + Map paramMap = new LinkedHashMap(); + paramMap.put("expr1", Converter.getXpathFromXpathExpr("/metadata/language")); + paramMap.put("vocabulary", "LangVocab"); + FunctionCall fc = new FunctionCall(); + fc.setExternalFunctionName("convert"); + fc.setParameters(paramMap); + Rules r = new Rules(); + r.setFunctionCall(fc); + ruleSet.add(r); + ruleMapping.put("language", ruleSet); + r = new Rules(); + r.setConstant("Text"); + ruleSet = new HashSet(); + ruleSet.add(r); + ruleMapping.put("CobjTypology", ruleSet); + + TransformationPrototype t = new TransformationPrototype(); + t.setTemplate(new ClassPathResource("/eu/dnetlib/data/collective/transformation/engine/template.xsl")); + t.init(); + t.setMapping(ruleMapping); + t.configureTransformation(); + + List records = getSourceRecords(); + SimpleTransformationEngine engine = new SimpleTransformationEngine(); + engine.setObjectRecords(records); + engine.setTransformation(t); + engine.transform(); + List mdRecords = engine.getMdRecords(); + assertEquals(2, mdRecords.size()); + for (String record: mdRecords){ + System.out.println(record); + } + + } +*/ + @Test + public void testTransformationFunctions(){ + + } +} diff --git a/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/utils/TransformationRulesImportToolTest.java b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/utils/TransformationRulesImportToolTest.java new file mode 100644 index 0000000..1942920 --- /dev/null +++ b/dnet-data-services/src/test/java/eu/dnetlib/data/collective/transformation/utils/TransformationRulesImportToolTest.java @@ -0,0 +1,119 @@ +package eu.dnetlib.data.collective.transformation.utils; + +import static org.junit.Assert.*; +import static org.mockito.Mockito.*; + +import java.util.LinkedList; +import java.util.List; + +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import eu.dnetlib.common.profile.ProfileNotFoundException; +import eu.dnetlib.data.collective.transformation.rulelanguage.RuleLanguageParser; +import eu.dnetlib.data.collective.transformation.rulelanguage.Rules; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; +import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import eu.dnetlib.enabling.tools.ServiceLocator; + +@RunWith(MockitoJUnitRunner.class) +public class TransformationRulesImportToolTest { + // class under test + private transient TransformationRulesImportTool importTool; + @Mock + private transient ISLookUpService lookupService; + @Mock + private transient ServiceLocator lookupLocator; + + private transient RuleLanguageParser parser; + + @Before + public void setUp(){ + when(lookupLocator.getService()).thenReturn(lookupService); + importTool = new TransformationRulesImportTool(); + importTool.setLookupServiceLocator(lookupLocator); + } + + @Test + public void testGetScript() throws ISLookUpException, ProfileNotFoundException{ + List queryResult = new LinkedList(); + queryResult.add("script"); + when(lookupService.quickSearchProfile(anyString())).thenReturn(queryResult); + assertEquals(1, importTool.getScript("someId").size()); + } + + @Test(expected = ProfileNotFoundException.class) + public void testScriptNotFound() throws ISLookUpException, ProfileNotFoundException{ + List queryResult = new LinkedList(); + //queryResult.add("script"); + when(lookupService.quickSearchProfile(anyString())).thenReturn(queryResult); + assertEquals(0, importTool.getScript("someId").size()); + } + + @Test(expected = IllegalStateException.class) + public void testImportWithInvalidNumberOfSubScripts() throws ISLookUpException, ProfileNotFoundException{ + parser = new RuleLanguageParser(); + List queryResult = new LinkedList(); + queryResult.add(getMainScriptNoSubscripts()); + queryResult.add("importedScriptProfileId-1"); + List secondQueryResult = new LinkedList(); + secondQueryResult.add(getSubScript_1()); + when(lookupService.quickSearchProfile(getXQuery("MainScriptProfileId"))).thenReturn(queryResult); + //when(lookupService.quickSearchProfile(getXQuery("importedScriptProfileId-1"))).thenReturn(secondQueryResult); + importTool.importRules(parser, "MainScriptProfileId"); + + } + + @Test + public void testImportWithSubScripts() throws ISLookUpException, ProfileNotFoundException{ + parser = new RuleLanguageParser(); + List queryResult = new LinkedList(); + queryResult.add(getMainScriptWithSubscript()); + queryResult.add("importedScriptProfileId-1"); + List secondQueryResult = new LinkedList(); + secondQueryResult.add(getSubScript_1()); + when(lookupService.quickSearchProfile(getXQuery("MainScriptProfileId"))).thenReturn(queryResult); + when(lookupService.quickSearchProfile(getXQuery("importedScriptProfileId-1"))).thenReturn(secondQueryResult); + importTool.importRules(parser, "MainScriptProfileId"); + assertEquals(1, parser.getElementMappingRules().size()); + assertEquals(1, parser.getNamespaceDeclarations().size()); + assertEquals(0, parser.getFunctionCalls().size()); + assertTrue(parser.getElementMappingRules().containsKey("prefix:element")); + Rules rule = (Rules)parser.getElementMappingRules().get("prefix:element").iterator().next(); + assertEquals("SomeConstantText", rule.getConstant()); + } + + private String getXQuery(String profileId){ + return "collection('/db/DRIVER/TransformationRuleDSResources')//RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value ='" + + profileId + "']//CODE/child::node(), " + + "for $id in (collection('/db/DRIVER/TransformationRuleDSResources')//RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value ='" + + profileId + "']//IMPORTED/SCRIPT_REFERENCE/@id) return string($id)"; + } + + private String getMainScriptNoSubscripts(){ + StringBuilder builder = new StringBuilder(); + //builder.append("import (sub1);"); + builder.append("declare_script \"abc\";\r\n"); + builder.append("end"); + return builder.toString(); + } + private String getMainScriptWithSubscript(){ + StringBuilder builder = new StringBuilder(); + builder.append("declare_script \"abc\";\r\n"); + builder.append("import (sub1);"); + builder.append("end"); + return builder.toString(); + } + + private String getSubScript_1(){ + StringBuilder builder = new StringBuilder(); + builder.append("declare_script \"sub1\";\r\n"); + builder.append("declare_ns prefix = \"someUri\";\r\n"); + builder.append("prefix:element = \"SomeConstantText\";\r\n"); + builder.append("end"); + return builder.toString(); + } +} diff --git a/dnet-data-services/src/test/resources/lang_vocabulary.xml b/dnet-data-services/src/test/resources/lang_vocabulary.xml new file mode 100644 index 0000000..c75a8b4 --- /dev/null +++ b/dnet-data-services/src/test/resources/lang_vocabulary.xml @@ -0,0 +1,1608 @@ + + +
+ + + + + +
+ + + Names of Languages + ISO 639-2 list of languages. It defines mapping from iso639-1 and DRIVER derived values. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + String + +
diff --git a/dnet-data-services/src/test/resources/mainScript_example.ftl b/dnet-data-services/src/test/resources/mainScript_example.ftl new file mode 100644 index 0000000..d8d109d --- /dev/null +++ b/dnet-data-services/src/test/resources/mainScript_example.ftl @@ -0,0 +1,14 @@ +// declare the import of script(s) +import (subScript_example.ftl); + +// declare namespace(s) +declare_ns prefix=http://somenamespace.xyz.de; + +// declare the script name +declare_script MainSample; + +// set a rule of type function +lv = Convert(xpath:/dc.metadata.language,LangVocab); + +// mark the end of the script +end diff --git a/dnet-data-services/src/test/resources/sample_record_dmf.xml b/dnet-data-services/src/test/resources/sample_record_dmf.xml new file mode 100644 index 0000000..bc90145 --- /dev/null +++ b/dnet-data-services/src/test/resources/sample_record_dmf.xml @@ -0,0 +1,47 @@ + +
+ ee4acd27-ba26-40a9-be52-e9b74e5c4af2_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:bieson.ub.uni-bielefeld.de:1522 + 2009-07-15T10:03:31Z + 6341f37d-eb3d-4aec-b2a9-f9dbe4a08f8d_TURTdG9yZURTUmVzb3VyY2VzL01EU3RvcmVEU1Jlc291cmNlVHlwZQ==::oai:bieson.ub.uni-bielefeld.de:1522 + + + ee4acd27-ba26-40a9-be52-e9b74e5c4af2_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU= + ee4acd27-ba26-40a9-be52-e9b74e5c4af2_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:bieson.ub.uni-bielefeld.de:1522 + 2009-07-15T10:03:31Z + 6341f37d-eb3d-4aec-b2a9-f9dbe4a08f8d_TURTdG9yZURTUmVzb3VyY2VzL01EU3RvcmVEU1Jlc291cmNlVHlwZQ==::oai:bieson.ub.uni-bielefeld.de:1522 + oai:bieson.ub.uni-bielefeld.de:1522 +
+ + + Starmann, Julia + 2008-01-01 + Im Rahmen dieser Arbeit wurde die Funktion der beiden im Genom von A. thaliana existieren Genen XPO1A und XPO1B untersucht. Die Expression beider Gene ist in allen Geweben von A. thaliana detektierbar (AtGenExpress). Die Analyse einer T-DNA Insertionslinie für XPO1A erbrachte keinen morphologischen Unterschied im Vergleich zum Wildtyp Col-0. Für xpo1b T-DNA Insertionslinien wurden kürzere Internodien festgestellt im Vergleich zum Wildtyp, was ein Hinweis darauf sein könnte, dass Proteine und andere Faktoren, die in die Entwicklung und das Wachstum von A. thaliana involviert sind, potentielle Cargosubstrate von XPO1B sind. Die Generierung von xpo1a xpo1b Doppelmutanten (Genotyp aabb) blieb erfolglos. Lediglich die Herstellung von heterozygoten T-DNA Insertionspflanzen (Genotyp AaBb) und solchen die heterozygot für xpo1a und homozygot für xpo1b (Genotyp Aabb) war möglich. Pflanzen, die einen der beiden Genotypen besaßen, wiesen einen starken Defekt in der Entwicklung der Samen auf. Während Wildtyp Col-0 Pflanzen durchschnittlich 55 bis 60 Samen pro Schote produzierten, entwickelten Pflanzen mit den Genotypen AaBb bzw. Aabb nur etwa zehn bis 15 Samen pro Schote aus. Einhergehend mit der Entwicklung einer deutlich reduzierten Anzahl an Samen waren die Schoten dieser Pflanzen mit acht bis zehn mm zusätzlich deutlich kleiner als solche des Wildtyps Col-0, die etwa 14 bis 16 mm lang waren. Eine homozygote T-DNA Insertion in beiden Genen xpo1a und xpo1b ist für die Pflanzen anscheinend gametophytisch letal. Der Kernexport ribosomaler Untereinheiten, aber auch der von mRNA in A. thaliana ist noch weitgehend unklar. Eine Interaktion von XPO1A mit dem NMD3, einem potenziellen Adapter zwischen XPO1 und der 60S ribosomalen Untereinheit, wurde nachgewiesen. Ein Rezeptor für den mRNA Kernexport, wie er z.B. in Hefen existiert, wurde noch nicht beschrieben. Als XPO1-interagierende Proteine wurden die Transkriptionsfaktoren MYB101, HSF3 und das Zinkfingerprotein AtCTH identifiziert. Das Kernexportsignal NES, ein meist Leucin-reiches Motiv, wurde in diesen transkriptionalen Regulatoren durch Deletions- und Mutationsanalyse charakterisiert. Die im Rahmen dieser Arbeit identifizierten NES dienen einer weiteren Arbeit am Lehrstuhl für Genomforschung als Grundlage zur Etablierung eines bioinformatisches Programms zur Identifizierung von NES in pflanzlichen Proteinen. MYB101 gehört zur Familie der so genannten GAMYB Transkriptionfaktoren, die durch Gibberellin gesteuert, die Expression von Zielgenen induzieren können. Ein Zielgen und die damit verbundene genaue Funktion von MYB101 sind noch unbekannt. Eine Bindung von MYB101 an das GARE Motive (gibberellic acid response element) im LEAFY Promotor wurde im Rahmen dieser Arbeit nachgewiesen. Zur Familie der GAMYBs gehören auch MYB33 und MYB65. Die Generierung von myb33 myb101, myb65 myb101 und myb33 myb65 myb101 T-DNA Insertionslinien verdeutlichte eine Funktion von MYB101 in der Entwicklung der Staubblätter und Pollen in A. thaliana. Diese Gewebe sind hauptsächlich die Orte, an denen die Expression von MYB101 in Microarray-Experimenten nachgewiesen wurde. RNA-bindende Proteine zählen ebenfalls zu den XPO1-interagierenden Proteinen. Für die hier untersuchten RNA-bindenden Proteine, CID11 und PAB2, wurde jeweils eine NES charakterisiert und eine XPO1-abhängige in vivo Lokalisierung in BY-2 Tabakprotoplasten wurde mittels Leptomycin B (LMB) nachgewiesen. Die RNA-bindenden Proteine CID9, CID10 und CID12 verändern ebenfalls ihre in vivo Lokalisierung unter dem Einfluss von LMB. Die Aufgabe von XPO1 beim Kernexport von RNA in A. thaliana wurde im Rahmen dieser Arbeit nicht abschließend geklärt. + http://bieson.ub.uni-bielefeld.de/volltexte/2009/1522/ + deu/ger + Lehrstuhl für Genomforschung -- Fakultät für Biologie + Universität Bielefeld + + + Proteinimport + RN + Exportin1 + Proteinexport + Nukleozytoplasmatisch + Life sciences, biology + Nukleozytoplasmatische Partitionierung von Transkriptionsfaktoren und RNA-bindenden Proteinen in Arabidopsis Thaliana + 0006 + + + urn:nbn:de:hbz:361-15222 + oai_dc + OAI + Textual + DE + + http://bieson.ub.uni-bielefeld.de/ + BieSOn - Bielefelder Server fuer Online-Publikationen + +
+ + diff --git a/dnet-data-services/src/test/resources/subScript_example.ftl b/dnet-data-services/src/test/resources/subScript_example.ftl new file mode 100644 index 0000000..74ac7f9 --- /dev/null +++ b/dnet-data-services/src/test/resources/subScript_example.ftl @@ -0,0 +1,10 @@ +// declares the script name +declare_script SubSample; + +// set rules of type constants +CobjTypology = TEXT; +CobjModel = OAI-PMH; +CobjMDformats = oai_dc; + +// mark the end of the script +end \ No newline at end of file diff --git a/dnet-data-services/src/test/resources/trds_sample.xml b/dnet-data-services/src/test/resources/trds_sample.xml new file mode 100644 index 0000000..3c8132b --- /dev/null +++ b/dnet-data-services/src/test/resources/trds_sample.xml @@ -0,0 +1,38 @@ + + +
+ + + + + +
+ + + + SubSample + + + + + SECURITY_PARAMETERS + +
diff --git a/dnet-data-services/src/test/resources/trds_sample2.xml b/dnet-data-services/src/test/resources/trds_sample2.xml new file mode 100644 index 0000000..0046f49 --- /dev/null +++ b/dnet-data-services/src/test/resources/trds_sample2.xml @@ -0,0 +1,32 @@ + + +
+ + + + + +
+ + + + + + + SECURITY_PARAMETERS + +
diff --git a/dnet-data-services/src/test/resources/type_vocabulary.xml b/dnet-data-services/src/test/resources/type_vocabulary.xml new file mode 100644 index 0000000..24a0f11 --- /dev/null +++ b/dnet-data-services/src/test/resources/type_vocabulary.xml @@ -0,0 +1,112 @@ + + +
+ + + + + +
+ + + Names of Text Object Typologies + List of typology types provided by the DRIVER Guidelines for Repository content + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
diff --git a/pom.xml b/pom.xml index 4bdc6dd..0348614 100644 --- a/pom.xml +++ b/pom.xml @@ -123,6 +123,16 @@ dom4j 1.6.1 + + com.sun.xsom + xsom + 20110809 + + + org.svenson + svenson-json + [1.4.0,1.5.0) + jaxen jaxen