dataminer-pool-manager/src/main/java/org/gcube/dataanalysis/dataminer/poolmanager/process/AlgorithmPackageParser.java

318 lines
11 KiB
Java
Executable File

package org.gcube.dataanalysis.dataminer.poolmanager.process;
***REMOVED***
import java.io.InputStream;
***REMOVED***
import java.util.HashMap;
***REMOVED***
import java.util.Map;
import java.util.Vector;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
***REMOVED***
import org.gcube.dataanalysis.dataminer.poolmanager.datamodel.Dependency;
***REMOVED***
***REMOVED***
public class AlgorithmPackageParser ***REMOVED***
/**
* The name of the file containing algorithm metadata. Expected in the root
* directory of the package.
*/
private final String METADATA_FILE_NAME = "Info.txt",
METADATA_USERNAME = "Username",
METADATA_FULLNAME = "Full Name",
METADATA_EMAIL = "Email",
METADATA_LANGUAGE = "Language",
METADATA_CATEGORY = "Algorithm Category",
METADATA_ALGORITHM_NAME = "Algorithm Name",
METADATA_ALGORITHM_DESCRIPTION = "Algorithm Description",
METADATA_CLASS_NAME = "Class Name",
***REMOVED*** private static final String METADATA_PACKAGES = "Packages";
METADATA_KEY_VALUE_SEPARATOR = ":";
private final int BUFFER_SIZE = 4096;
private Logger logger;
public AlgorithmPackageParser() ***REMOVED***
this.logger = LoggerFactory.getLogger(AlgorithmPackageParser.class);
***REMOVED***
public Algorithm parsePackage(String url) throws IOException ***REMOVED***
String packageMetadata = this.getPackageMetadata(url);
if (packageMetadata == null) ***REMOVED***
this.logger.warn("WARNING: No metadata found for " + url);
return null;
***REMOVED*** else ***REMOVED***
Map<String, List<String>> parsedMetadata = this.parseMetadata(packageMetadata);
Algorithm a = this.createAlgorithm(parsedMetadata);
a.setPackageURL(url);
return a;
***REMOVED***
***REMOVED***
private String getPackageMetadata(String url) throws IOException ***REMOVED***
InputStream is = new URL(url).openStream();
ZipInputStream zipIs = new ZipInputStream(is);
ZipEntry entry = zipIs.getNextEntry();
String out = null;
while (entry != null) ***REMOVED***
if (METADATA_FILE_NAME.equalsIgnoreCase(entry.getName())) ***REMOVED***
out = this.getEntryContent(zipIs);
break;
***REMOVED***
entry = zipIs.getNextEntry();
***REMOVED***
is.close();
zipIs.close();
return out;
***REMOVED***
private String getEntryContent(ZipInputStream zipIn) throws IOException ***REMOVED***
StringBuilder s = new StringBuilder();
byte[] buffer = new byte[BUFFER_SIZE];
int read = 0;
while ((read = zipIn.read(buffer)) != -1) ***REMOVED***
s.append(new String(buffer, 0, read));
***REMOVED***
return s.toString();
***REMOVED***
private Map<String, List<String>> parseMetadata(String metadata) ***REMOVED***
Map<String, List<String>> out = new HashMap<String, List<String>>();
String[] lines = metadata.split("\n");
String key = null;
String value = null;
for (String line : lines) ***REMOVED***
***REMOVED*** skip empty lines
if (line.trim().isEmpty()) ***REMOVED***
continue;
***REMOVED***
***REMOVED*** scan lines one by one, looking for key and values
String[] parts = line.split(METADATA_KEY_VALUE_SEPARATOR);
if (parts.length > 1) ***REMOVED***
***REMOVED*** key and value on the same line
key = parts[0].trim();
value = line.substring(parts[0].length() + 1).trim();
***REMOVED*** else if (parts.length == 1) ***REMOVED***
***REMOVED*** either a key or a value
if (line.trim().endsWith(METADATA_KEY_VALUE_SEPARATOR)) ***REMOVED***
***REMOVED*** key
key = parts[0].trim();
value = null;
***REMOVED*** else ***REMOVED***
***REMOVED*** value
value = line.trim();
***REMOVED***
***REMOVED***
***REMOVED*** add key+value to the map
if (key != null && value != null) ***REMOVED***
List<String> values = out.get(key);
if (values == null) ***REMOVED***
values = new Vector<>();
out.put(key, values);
***REMOVED***
values.add(value);
this.logger.debug(key + METADATA_KEY_VALUE_SEPARATOR + " " + values);
***REMOVED***
***REMOVED***
return out;
***REMOVED***
***REMOVED*** private Algorithm createAlgorithm(Map<String, List<String>> metadata) ***REMOVED***
***REMOVED*** Algorithm out = new Algorithm();
***REMOVED*** out.setName(extractSingleValue(metadata, METADATA_ALGORITHM_NAME));
***REMOVED*** out.setDescription(extractSingleValue(metadata, METADATA_ALGORITHM_DESCRIPTION));
***REMOVED*** out.setClazz(extractSingleValue(metadata, METADATA_CLASS_NAME));
***REMOVED*** List<String> dependencies = extractMultipleValues(metadata, METADATA_PACKAGES);
***REMOVED*** if (dependencies != null) ***REMOVED***
***REMOVED*** for (String pkg : dependencies) ***REMOVED***
***REMOVED*** Dependency dep = new Dependency();
***REMOVED*** dep.setName(pkg);
***REMOVED*** dep.setType("os");
***REMOVED*** out.addDependency(dep);
***REMOVED*** ***REMOVED***
***REMOVED*** ***REMOVED***
***REMOVED*** return out;
***REMOVED*** ***REMOVED***
private Algorithm createAlgorithm(Map<String, List<String>> metadata) ***REMOVED***
Algorithm out = new Algorithm();
out.setName(extractSingleValue(metadata, METADATA_ALGORITHM_NAME));
out.setDescription(extractSingleValue(metadata, METADATA_ALGORITHM_DESCRIPTION));
out.setClazz(extractSingleValue(metadata, METADATA_CLASS_NAME));
out.setEmail(extractSingleValue(metadata, METADATA_EMAIL));
out.setFullname(extractSingleValue(metadata, METADATA_FULLNAME));
out.setUsername(extractSingleValue(metadata, METADATA_USERNAME));
out.setLanguage(extractSingleValue(metadata, METADATA_LANGUAGE));
out.setCategory(extractSingleValue(metadata, METADATA_CATEGORY));
List<String> dependencies = extractMultipleValues(metadata, "Package Name");
if (dependencies != null) ***REMOVED***
for (String pkg : dependencies) ***REMOVED***
Dependency dep = new Dependency();
dep.setName(pkg);
out.addDependency(dep);
***REMOVED***
***REMOVED***
***REMOVED*** List<String> rdependencies = extractMultipleValues(metadata, "cran");
***REMOVED*** if (rdependencies != null) ***REMOVED***
***REMOVED*** for (String pkg : rdependencies) ***REMOVED***
***REMOVED*** Dependency dep = new Dependency();
***REMOVED***
***REMOVED*** ***REMOVED***if (pkg.startsWith("os:"))***REMOVED***
***REMOVED*** dep.setName(pkg);
***REMOVED*** dep.setType("cran");
***REMOVED*** out.addDependency(dep);
***REMOVED*** ***REMOVED***
***REMOVED*** ***REMOVED***
***REMOVED***
***REMOVED***
***REMOVED*** List<String> defdependencies = extractMultipleValues(metadata, "Packages");
***REMOVED*** if (defdependencies != null) ***REMOVED***
***REMOVED*** for (String pkg : defdependencies) ***REMOVED***
***REMOVED*** Dependency dep = new Dependency();
***REMOVED***
***REMOVED*** ***REMOVED***if (pkg.startsWith("os:"))***REMOVED***
***REMOVED*** dep.setName(pkg);
***REMOVED*** dep.setType("os");
***REMOVED*** out.addDependency(dep);
***REMOVED*** ***REMOVED***
***REMOVED*** ***REMOVED***
***REMOVED***
***REMOVED*** List<String> osdependencies = extractMultipleValues(metadata, "os");
***REMOVED*** if (osdependencies != null) ***REMOVED***
***REMOVED*** for (String pkg : osdependencies) ***REMOVED***
***REMOVED*** Dependency dep = new Dependency();
***REMOVED***
***REMOVED*** ***REMOVED***if (pkg.startsWith("os:"))***REMOVED***
***REMOVED*** dep.setName(pkg);
***REMOVED*** dep.setType("os");
***REMOVED*** out.addDependency(dep);
***REMOVED*** ***REMOVED***
***REMOVED*** ***REMOVED***
***REMOVED***
***REMOVED***
***REMOVED***
***REMOVED*** List<String> gitdependencies = extractMultipleValues(metadata, "github");
***REMOVED*** if (gitdependencies != null) ***REMOVED***
***REMOVED*** for (String pkg : gitdependencies) ***REMOVED***
***REMOVED*** Dependency dep = new Dependency();
***REMOVED***
***REMOVED*** ***REMOVED***if (pkg.startsWith("os:"))***REMOVED***
***REMOVED*** dep.setName(pkg);
***REMOVED*** dep.setType("github");
***REMOVED*** out.addDependency(dep);
***REMOVED*** ***REMOVED***
***REMOVED*** ***REMOVED***
***REMOVED***
***REMOVED***
***REMOVED***
***REMOVED*** List<String> cdependencies = extractMultipleValues(metadata, "custom");
***REMOVED*** if (cdependencies != null) ***REMOVED***
***REMOVED*** for (String pkg : cdependencies) ***REMOVED***
***REMOVED*** Dependency dep = new Dependency();
***REMOVED***
***REMOVED*** ***REMOVED***if (pkg.startsWith("os:"))***REMOVED***
***REMOVED*** dep.setName(pkg);
***REMOVED*** dep.setType("custom");
***REMOVED*** out.addDependency(dep);
***REMOVED*** ***REMOVED***
***REMOVED*** ***REMOVED***
***REMOVED*** if (pkg.startsWith("r:"))***REMOVED***
***REMOVED*** ***REMOVED***String results = StringEscapeUtils.escapeJava(pkg);
***REMOVED*** dep.setName(pkg);
***REMOVED*** dep.setType("cran");
***REMOVED*** ***REMOVED***
***REMOVED*** if (pkg.startsWith("custom:"))***REMOVED***
***REMOVED*** dep.setName(pkg);
***REMOVED*** dep.setType("custom");
***REMOVED*** ***REMOVED***
***REMOVED*** if (!pkg.startsWith("os:")&&!pkg.startsWith("r:")&&!pkg.startsWith("custom:"))***REMOVED***
***REMOVED*** dep.setName(pkg);
***REMOVED*** dep.setType("os");
***REMOVED*** ***REMOVED***
return out;
***REMOVED***
private static String extractSingleValue(Map<String, List<String>> metadata,
String key) ***REMOVED***
List<String> l = metadata.get(key);
if (l != null && l.size() == 1) ***REMOVED***
return l.get(0);
***REMOVED*** else ***REMOVED***
return null;
***REMOVED***
***REMOVED***
private static List<String> extractMultipleValues(
Map<String, List<String>> metadata, String key) ***REMOVED***
List<String> l = metadata.get(key);
if (l != null) ***REMOVED***
return new Vector<>(l);
***REMOVED*** else ***REMOVED***
return null;
***REMOVED***
***REMOVED***
public static void main(String[] args) ***REMOVED***
AlgorithmPackageParser ap = new AlgorithmPackageParser();
String txt =
"Username: giancarlo.panichi\n"+
"Full Name: Giancarlo Panichi\n"+
"Email: g.panichi@isti.cnr.it\n"+
"Language: R\n"+
"Algorithm Name: RBLACKBOX\n"+
"Class Name: org.gcube.dataanalysis.executor.rscripts.RBlackBox\n"+
"Algorithm Description: RBlackBox\n"+
"Algorithm Category: BLACK_BOX\n"+
"Interpreter Version: 3.2.1\n"+
"Packages:\n"+
"Package Name: DBI\n"+
"Package Name: RPostgreSQL\n"+
"Package Name: raster\n"+
"Package Name: maptools\n"+
"Package Name: sqldf\n"+
"Package Name: RJSONIO\n"+
"Package Name: httr \n"+
"Package Name: data.table";
ap.parseMetadata(txt);
***REMOVED***
***REMOVED***