package org.gcube.dataanalysis.executor.nodes.transducers.bionym.abstracts; import java.io.File; import java.security.Permission; import java.util.HashMap; import java.util.UUID; import org.gcube.contentmanagement.lexicalmatcher.utils.AnalysisLogger; import org.gcube.dataanalysis.executor.nodes.transducers.bionym.implementations.matchers.LevensteinMatcher; import org.gcube.dataanalysis.executor.nodes.transducers.bionym.interfaces.Matcher; import org.gcube.dataanalysis.executor.nodes.transducers.bionym.utils.YasmeenFileTools; import org.gcube.dataanalysis.executor.nodes.transducers.bionym.utils.YasmeenGlobalParameters; public abstract class YasmeenMatcher implements Matcher { public static String matcherLib = "YASMEEN-matcher-1.2.0.1.jar"; protected String sandBoxFolder = "./"; public double threshold = 0.4; public int maxResults = 10; public HashMap parameters; HashMap urlsmap = createReferenceMap(); static HashMap performancemap = createPerformanceMap(); protected static HashMap createReferenceMap() { HashMap urlsMaps = new HashMap(); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.ASFIS.name(), "https://data.d4science.org/shub/E_Z3g0Vm9MMDZBYld4MEMySUdET0hzWnd5R1NYUG1PcXZaU21DU2tLd0xmaCtBWk9sWHUzVmNJMTNhblBXV0F0VQ=="); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.FISHBASE.name(), "https://data.d4science.org/shub/E_c0pLQ2ZSZGd2ZFNLTFozNWM1eEZvc25CdFdibysvVVd1dVZDb3p1T0ZJb1pJT3g5YXNFY0lmbUFHMVFHaXdwZA=="); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.OBIS.name(), "https://data.d4science.org/shub/E_UFdIK2IxOEtoODAyTUNxY1BjTGJNclpQMDhkaEdHYXVNMUxmK3B2OFlNNDE3OWg3d0d1cm5JSHhCTDNYaGhSNw=="); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.OBIS_ANIMALIA.name(), "https://data.d4science.org/shub/E_ZUExS3FlU25NTURycXRUODVkZXdMZWJsR1dYMVpnVW42RDNHTHNkaGJtZjFaL2VGQklkTGUrUVdVa05uWnVZag=="); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.OBIS_CNIDARIA.name(), "https://data.d4science.org/shub/E_SHBjWnp1TTcrblVrMHJqZDZTTlN1bGJKVWxJV0FKUC8wYVpRZkFPMkZLbWJUMHlSRjBCQjNSVjFjalI0UWx5cQ=="); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.OBIS_ECHINODERMATA.name(), "https://data.d4science.org/shub/E_R1ZYbEdHKzBtYlBPbzB6T3NWRDgrdlNHbWlPdHJRZVJDUmtEVitLQk11Unl4ZkE4aGhxNTFGZWxTV3lkZm9kOA=="); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.OBIS_PLATYHELMINTHES.name(), "https://data.d4science.org/shub/E_R25wUVFRSGJwK0xJL1MzZFdmeWNzZlNKNFFKQXRwL0JrZXBjYTJvRnE0a2Z2dm16L2VqZlI4ZUd0bldJV3h1Uw=="); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.COL_FULL.name(), "https://data.d4science.org/shub/E_NUJlVVhKak9NeWpIekdFdUU1Ty96UHA2QkpCZ0xISVNrdnN4VlBvRnRQZnhLcW1CUXVVMGMzbU5Dekpkcm5HaA=="); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.COL_CHORDATA.name(), "https://data.d4science.org/shub/E_L3hTNzdrWFRQLzJYaVRGR0ZhWVhwSmluNFNvdkNKN3lWNEQ1Z0orZXBqSUdWYUJPaTFPMXdZWW1LYzg0UUtHUQ=="); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.COL_MAMMALIA.name(), "https://data.d4science.org/shub/E_dHh4akNxazViQUVNTFNDd0E4RldnTDVlSUZ5b1lLdDluKzV2R21sSHZWTXBIaGVKbDNnZmUrNWVtUjZZUmthaA=="); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.IRMNG_ACTINOPTERYGII.name(), "https://data.d4science.org/shub/E_bzZtY2xUeFZNSTA1WThMQjNLWWNrc1piSjRPZnk5REUrTUt6bVU3QjRIRWorUTBJUWZPNEdHOWdSYyt0RGFLVg=="); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.WORMS_ANIMALIA.name(), "https://data.d4science.org/shub/E_czVkaEtpeGxJTHJ5ZUNJanZBMzJmb3Q2VFhyNkZCZzlNTTk4KzRMcFQ5UlcrQlU0NEFJVEwzNmIxVmR2Vkt1YQ=="); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.WORMS_PISCES.name(), "https://data.d4science.org/shub/E_Ylg2azNMMTBPOFIwNlRwMFZVSUM2WlFOSGZZUll0MVk4OXI2bmYrM2Y2OGlLYk5DZ3lqTlVPRUNKcnpITG1aRw=="); return urlsMaps; } protected HashMap createReferenceMapFile(String localFolder) { HashMap urlsMaps = new HashMap(); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.ASFIS.name(), new File(localFolder, "ASFIS_taxa.taf").getAbsolutePath()); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.FISHBASE.name(), new File(localFolder, "FISHBASE_taxa.taf").getAbsolutePath()); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.OBIS.name(), new File(localFolder, "OBIS_taxa.taf").getAbsolutePath()); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.OBIS_ANIMALIA.name(), new File(localFolder, "OBIS_Animalia_taxa.taf").getAbsolutePath()); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.OBIS_CNIDARIA.name(), new File(localFolder, "OBIS_Cnidaria_taxa.taf").getAbsolutePath()); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.OBIS_ECHINODERMATA.name(), new File(localFolder, "OBIS_Echinodermata_taxa.taf").getAbsolutePath()); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.OBIS_PLATYHELMINTHES.name(), new File(localFolder, "OBIS_Platyhelminthes_taxa.taf").getAbsolutePath()); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.COL_FULL.name(), new File(localFolder, "COL_FULL_taxa.taf").getAbsolutePath()); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.COL_CHORDATA.name(), new File(localFolder, "COL_Chordata_taxa.taf").getAbsolutePath()); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.COL_MAMMALIA.name(), new File(localFolder, "COL_Mammalia_taxa.taf").getAbsolutePath()); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.IRMNG_ACTINOPTERYGII.name(), new File(localFolder, "IRMNG_Actinopterygii_taxa.taf").getAbsolutePath()); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.WORMS_ANIMALIA.name(), new File(localFolder, "WORMS_Animalia_taxa.taf").getAbsolutePath()); urlsMaps.put(YasmeenGlobalParameters.BuiltinDataSources.WORMS_PISCES.name(), new File(localFolder, "WoRMS_Pisces_taxa.taf").getAbsolutePath()); return urlsMaps; } protected static HashMap createPerformanceMap() { HashMap performanceMap = new HashMap(); performanceMap.put(YasmeenGlobalParameters.Performance.MAX_ACCURACY.name(), ""); performanceMap.put(YasmeenGlobalParameters.Performance.LOW_SPEED.name(), "-dpt 10"); performanceMap.put(YasmeenGlobalParameters.Performance.MEDIUM_SPEED.name(), "-dpt 6"); performanceMap.put(YasmeenGlobalParameters.Performance.HIGH_SPEED.name(), "-dpt 2"); performanceMap.put(YasmeenGlobalParameters.Performance.MAX_SPEED.name(), "-dpt 0"); return performanceMap; } protected abstract String getMatchlets(); protected abstract String getLexicalDistancesWeights(); protected abstract String getStemming(); public YasmeenMatcher(String sandboxfolder, double threshold, int maxResults, HashMap parameters) { init(sandboxfolder, threshold, maxResults, parameters); } @Override public void init(String sandboxfolder, double threshold, int maxResults, HashMap parameters) { this.sandBoxFolder = sandboxfolder; this.parameters = parameters; this.threshold = threshold; this.maxResults = maxResults; } @Override public MatcherOutput match(MatcherInput inputName) throws Exception { String inputFileName = parameters.get(YasmeenGlobalParameters.parserOutputFileParam); String uuid = ("" + UUID.randomUUID()).replace("-", ""); String outputFileName = "matcherOutput" + uuid + ".csv"; File outfile = new File(sandBoxFolder, outputFileName); if (outfile.exists()) { outfile.delete(); } // String outputFileName = parameters.get(YasmeenGlobalParameters.parserOutputFileParam); String taxaAuthorityFile = parameters.get(YasmeenGlobalParameters.taxaAuthorityFileParam); String performanceMap = parameters.get(YasmeenGlobalParameters.performanceParam); String performanceCommand = performancemap.get(performanceMap); if (performanceCommand == null) performanceCommand = ""; boolean stemmit = Boolean.parseBoolean(parameters.get(YasmeenGlobalParameters.useStemmedGenusAndSpecies)); String normalizeMatchlets = ""; String standardMatchlets = ""; if (stemmit) normalizeMatchlets = getStemming(); else standardMatchlets = getMatchlets(); /* ENABLE LOCAL FILES USAGE */ /* * String localFilesFolder = parameters.get(YasmeenGlobalParameters.staticFilesFolderParam); if (localFilesFolder==null){ localFilesFolder=""; urlsmap=createReferenceMap(); } else urlsmap=createReferenceMapFile(localFilesFolder); */ // java -jar YASMEEN-matcher-1.1.1.jar -inFile output.csv -refData ASFIS@https://dl.dropboxusercontent.com/u/12809149/ASFIS_taxa.taf.gz -outFile matchout.csv -man -may -mSn -mt -mc 10 -ps -pt 1.5x -xml -xslTemplate csv -law 90:30:60 // YasmeenFileTools.writeYasmeenMatcherInput(sandBoxFolder, inputFileName, parser, inputName); // org.fao.fi.comet.domain.species.tools.process.matching.cli.MatchingEngine String localFilesFolder = parameters.get(YasmeenGlobalParameters.staticFilesFolderParam); if ((localFilesFolder != null) && (localFilesFolder.length() > 0) && (taxaAuthorityFile.equals(YasmeenGlobalParameters.BuiltinDataSources.FISHBASE.name())) && (this instanceof LevensteinMatcher) && new File(createReferenceMapFile(localFilesFolder).get(taxaAuthorityFile)).exists()) { RealTimeMatcher rm = new RealTimeMatcher(); HashMap filesMap = createReferenceMapFile(localFilesFolder); rm.match(filesMap.get(taxaAuthorityFile), taxaAuthorityFile, new File(sandBoxFolder, inputFileName).getAbsolutePath(), outfile.getAbsolutePath(), threshold, maxResults); } else { String execution = "java -Xmx512m -Xmx1024m -jar " + new File(sandBoxFolder, matcherLib).getAbsolutePath() + " -inFile " + new File(sandBoxFolder, inputFileName).getAbsolutePath() + " -refData " + taxaAuthorityFile + "@" + urlsmap.get(taxaAuthorityFile) + " -outFile " + outfile.getAbsolutePath() + " " + normalizeMatchlets + " -mc " + maxResults + " -mst " + threshold + " -pt 1x -xml -xslTemplate csv" + " " + getLexicalDistancesWeights() + " " + standardMatchlets + " " + performanceCommand; AnalysisLogger.getLogger().debug(execution); YasmeenFileTools.callYasmeen(execution.trim()); } MatcherOutput mo = YasmeenFileTools.getYasmeenMatcherOutput(sandBoxFolder, outputFileName); outfile.delete(); return mo; } private static class NoExitSecurityManager extends SecurityManager { @Override public void checkPermission(Permission perm) { // allow anything. } @Override public void checkPermission(Permission perm, Object context) { // allow anything. } @Override public void checkExit(int status) { super.checkExit(status); throw new RuntimeException("exit status is "+status); } } }