package org.gcube.dataanalysis.dataminer.poolmanager.process; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Vector; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import org.gcube.dataanalysis.dataminer.poolmanager.datamodel.Algorithm; import org.gcube.dataanalysis.dataminer.poolmanager.datamodel.Dependency; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class AlgorithmPackageParser { /** * The name of the file containing algorithm metadata. Expected in the root * directory of the package. */ private final String METADATA_FILE_NAME = "Info.txt", METADATA_USERNAME = "Username", METADATA_FULLNAME = "Full Name", METADATA_EMAIL = "Email", METADATA_LANGUAGE = "Language", METADATA_CATEGORY = "Algorithm Category", METADATA_ALGORITHM_NAME = "Algorithm Name", METADATA_ALGORITHM_DESCRIPTION = "Algorithm Description", METADATA_CLASS_NAME = "Class Name", // private static final String METADATA_PACKAGES = "Packages"; METADATA_KEY_VALUE_SEPARATOR = ":"; private final int BUFFER_SIZE = 4096; private Logger logger; public AlgorithmPackageParser() { this.logger = LoggerFactory.getLogger(AlgorithmPackageParser.class); } public Algorithm parsePackage(String url) throws IOException { String packageMetadata = this.getPackageMetadata(url); if (packageMetadata == null) { this.logger.warn("WARNING: No metadata found for " + url); return null; } else { Map> parsedMetadata = this.parseMetadata(packageMetadata); Algorithm a = this.createAlgorithm(parsedMetadata); a.setPackageURL(url); return a; } } private String getPackageMetadata(String url) throws IOException { InputStream is = new URL(url).openStream(); ZipInputStream zipIs = new ZipInputStream(is); ZipEntry entry = zipIs.getNextEntry(); String out = null; while (entry != null) { if (METADATA_FILE_NAME.equalsIgnoreCase(entry.getName())) { out = this.getEntryContent(zipIs); break; } entry = zipIs.getNextEntry(); } is.close(); zipIs.close(); return out; } private String getEntryContent(ZipInputStream zipIn) throws IOException { StringBuilder s = new StringBuilder(); byte[] buffer = new byte[BUFFER_SIZE]; int read = 0; while ((read = zipIn.read(buffer)) != -1) { s.append(new String(buffer, 0, read)); } return s.toString(); } private Map> parseMetadata(String metadata) { Map> out = new HashMap>(); String[] lines = metadata.split("\n"); String key = null; String value = null; for (String line : lines) { // skip empty lines if (line.trim().isEmpty()) { continue; } // scan lines one by one, looking for key and values String[] parts = line.split(METADATA_KEY_VALUE_SEPARATOR); if (parts.length > 1) { // key and value on the same line key = parts[0].trim(); value = line.substring(parts[0].length() + 1).trim(); } else if (parts.length == 1) { // either a key or a value if (line.trim().endsWith(METADATA_KEY_VALUE_SEPARATOR)) { // key key = parts[0].trim(); value = null; } else { // value value = line.trim(); } } // add key+value to the map if (key != null && value != null) { List values = out.get(key); if (values == null) { values = new Vector<>(); out.put(key, values); } values.add(value); this.logger.debug(key + METADATA_KEY_VALUE_SEPARATOR + " " + values); } } return out; } // private Algorithm createAlgorithm(Map> metadata) { // Algorithm out = new Algorithm(); // out.setName(extractSingleValue(metadata, METADATA_ALGORITHM_NAME)); // out.setDescription(extractSingleValue(metadata, METADATA_ALGORITHM_DESCRIPTION)); // out.setClazz(extractSingleValue(metadata, METADATA_CLASS_NAME)); // List dependencies = extractMultipleValues(metadata, METADATA_PACKAGES); // if (dependencies != null) { // for (String pkg : dependencies) { // Dependency dep = new Dependency(); // dep.setName(pkg); // dep.setType("os"); // out.addDependency(dep); // } // } // return out; // } private Algorithm createAlgorithm(Map> metadata) { Algorithm out = new Algorithm(); out.setName(extractSingleValue(metadata, METADATA_ALGORITHM_NAME)); out.setDescription(extractSingleValue(metadata, METADATA_ALGORITHM_DESCRIPTION)); out.setClazz(extractSingleValue(metadata, METADATA_CLASS_NAME)); out.setEmail(extractSingleValue(metadata, METADATA_EMAIL)); out.setFullname(extractSingleValue(metadata, METADATA_FULLNAME)); out.setUsername(extractSingleValue(metadata, METADATA_USERNAME)); out.setLanguage(extractSingleValue(metadata, METADATA_LANGUAGE)); out.setCategory(extractSingleValue(metadata, METADATA_CATEGORY)); List dependencies = extractMultipleValues(metadata, "Package Name"); if (dependencies != null) { for (String pkg : dependencies) { Dependency dep = new Dependency(); dep.setName(pkg); out.addDependency(dep); } } // List rdependencies = extractMultipleValues(metadata, "cran"); // if (rdependencies != null) { // for (String pkg : rdependencies) { // Dependency dep = new Dependency(); // // //if (pkg.startsWith("os:")){ // dep.setName(pkg); // dep.setType("cran"); // out.addDependency(dep); // } // } // // // List defdependencies = extractMultipleValues(metadata, "Packages"); // if (defdependencies != null) { // for (String pkg : defdependencies) { // Dependency dep = new Dependency(); // // //if (pkg.startsWith("os:")){ // dep.setName(pkg); // dep.setType("os"); // out.addDependency(dep); // } // } // // List osdependencies = extractMultipleValues(metadata, "os"); // if (osdependencies != null) { // for (String pkg : osdependencies) { // Dependency dep = new Dependency(); // // //if (pkg.startsWith("os:")){ // dep.setName(pkg); // dep.setType("os"); // out.addDependency(dep); // } // } // // // // List gitdependencies = extractMultipleValues(metadata, "github"); // if (gitdependencies != null) { // for (String pkg : gitdependencies) { // Dependency dep = new Dependency(); // // //if (pkg.startsWith("os:")){ // dep.setName(pkg); // dep.setType("github"); // out.addDependency(dep); // } // } // // // // List cdependencies = extractMultipleValues(metadata, "custom"); // if (cdependencies != null) { // for (String pkg : cdependencies) { // Dependency dep = new Dependency(); // // //if (pkg.startsWith("os:")){ // dep.setName(pkg); // dep.setType("custom"); // out.addDependency(dep); // } // } // if (pkg.startsWith("r:")){ // //String results = StringEscapeUtils.escapeJava(pkg); // dep.setName(pkg); // dep.setType("cran"); // } // if (pkg.startsWith("custom:")){ // dep.setName(pkg); // dep.setType("custom"); // } // if (!pkg.startsWith("os:")&&!pkg.startsWith("r:")&&!pkg.startsWith("custom:")){ // dep.setName(pkg); // dep.setType("os"); // } return out; } private static String extractSingleValue(Map> metadata, String key) { List l = metadata.get(key); if (l != null && l.size() == 1) { return l.get(0); } else { return null; } } private static List extractMultipleValues( Map> metadata, String key) { List l = metadata.get(key); if (l != null) { return new Vector<>(l); } else { return null; } } public static void main(String[] args) { AlgorithmPackageParser ap = new AlgorithmPackageParser(); String txt = "Username: giancarlo.panichi\n"+ "Full Name: Giancarlo Panichi\n"+ "Email: g.panichi@isti.cnr.it\n"+ "Language: R\n"+ "Algorithm Name: RBLACKBOX\n"+ "Class Name: org.gcube.dataanalysis.executor.rscripts.RBlackBox\n"+ "Algorithm Description: RBlackBox\n"+ "Algorithm Category: BLACK_BOX\n"+ "Interpreter Version: 3.2.1\n"+ "Packages:\n"+ "Package Name: DBI\n"+ "Package Name: RPostgreSQL\n"+ "Package Name: raster\n"+ "Package Name: maptools\n"+ "Package Name: sqldf\n"+ "Package Name: RJSONIO\n"+ "Package Name: httr \n"+ "Package Name: data.table"; ap.parseMetadata(txt); } }