package org.gcube.data.spd.executor.jobs.darwincore; import static org.gcube.data.streams.dsl.Streams.convert; import static org.gcube.data.streams.dsl.Streams.pipe; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStreamWriter; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Iterator; import java.util.Map; import java.util.UUID; import org.gcube.contentmanagement.blobstorage.service.IClient; import org.gcube.contentmanager.storageclient.wrapper.AccessType; import org.gcube.contentmanager.storageclient.wrapper.StorageClient; import org.gcube.data.spd.Constants; import org.gcube.data.spd.executor.jobs.URLJob; import org.gcube.data.spd.executor.jobs.csv.OccurrenceReaderByKey; import org.gcube.data.spd.model.exceptions.StreamException; import org.gcube.data.spd.model.products.OccurrencePoint; import org.gcube.data.spd.model.products.TaxonomyItem; import org.gcube.data.spd.model.service.types.JobStatus; import org.gcube.data.spd.plugin.fwk.AbstractPlugin; import org.gcube.data.spd.plugin.fwk.readers.LocalReader; import org.gcube.data.spd.plugin.fwk.writers.rswrapper.LocalWrapper; import org.gcube.data.spd.utils.DynamicMap; import org.gcube.data.spd.utils.Utils; import org.gcube.data.streams.Stream; import org.gcube.data.streams.exceptions.StreamSkipSignal; import org.gcube.data.streams.exceptions.StreamStopSignal; import org.gcube.data.streams.generators.Generator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class DarwinCoreJob implements URLJob{ /** * */ private static final long serialVersionUID = 1L; private static Logger logger = LoggerFactory.getLogger(DarwinCoreJob.class); private int completedEntries = 0; private Calendar endDate, startDate; private String resultURL = null; private String errorFileURL = null; private JobStatus status; private String id; private Map mapSubJobs; private Map plugins; public DarwinCoreJob(Map plugins) { this.id = UUID.randomUUID().toString(); this.status = JobStatus.PENDING; this.plugins = plugins; } @Override public void run() { File darwincoreFile =null; File errorFile = null; try{ this.startDate = Calendar.getInstance(); this.status = JobStatus.RUNNING; LocalWrapper localWrapper = new LocalWrapper(2000); localWrapper.forceOpen(); Stream ids =convert(DynamicMap.get(this.id)); OccurrenceReaderByKey occurrenceReader = new OccurrenceReaderByKey(localWrapper, ids, plugins); new Thread(occurrenceReader).start(); LocalReader ocReader= new LocalReader(localWrapper); IClient client = new StorageClient(Constants.SERVICE_CLASS, Constants.SERVICE_NAME, "DarwinCore", AccessType.SHARED).getClient(); darwincoreFile =getDarwinCoreFile(ocReader); String resultPath = "/darwincore/"+this.id.replace("-", ""); client.put(true).LFile(darwincoreFile.getAbsolutePath()).RFile(resultPath); this.resultURL=client.getUrl().RFile(resultPath); errorFile = Utils.createErrorFile( pipe(convert(localWrapper.getErrors())).through(new Generator() { @Override public String yield(StreamException element) throws StreamSkipSignal, StreamStopSignal { return element.getRepositoryName()+" "+element.getIdentifier(); } })); if (errorFile!=null){ String errorFilePath = "/darwincore/"+this.id.replace("-", "")+"-ERRORS.txt"; client.put(true).LFile(darwincoreFile.getAbsolutePath()).RFile(errorFilePath); this.errorFileURL=client.getUrl().RFile(errorFilePath); } logger.trace("filePath is "+darwincoreFile.getAbsolutePath()); this.status = JobStatus.COMPLETED; }catch (Exception e) { logger.error("error executing DWCAJob",e); this.status = JobStatus.FAILED; return; }finally{ if (darwincoreFile!=null) darwincoreFile.delete(); if (errorFile!=null) errorFile.delete(); this.endDate = Calendar.getInstance(); DynamicMap.remove(this.id); } } public JobStatus getStatus() { return status; } public void setStatus(JobStatus status) { this.status = status; } public Calendar getEndDate() { return endDate; } public Calendar getStartDate() { return startDate; } public String getId() { return id; } public Map getMapSubJobs() { return mapSubJobs; } public String getResultURL() { return resultURL; } @Override public String getErrorURL() { return this.errorFileURL; } @Override public boolean validateInput(String input) { return true; } @Override public int getCompletedEntries() { return completedEntries; } private File getDarwinCoreFile(Iterator reader) throws Exception{ DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); OutputStreamWriter writer = new OutputStreamWriter(new ByteArrayOutputStream()); try{ File returnFile = File.createTempFile("darwinCore", "xml"); writer = new FileWriter(returnFile); writer.append(""); writer.append(""); while (reader.hasNext()){ writer.append(""); writer.append("en"); OccurrencePoint occurrence= reader.next(); if (occurrence.getModified() != null) writer.append("" + df.format(occurrence.getModified().getTime()) + ""); if (occurrence.getBasisOfRecord() != null) writer.append("" + occurrence.getBasisOfRecord().name() + ""); if (occurrence.getScientificNameAuthorship() != null) writer.append("" + occurrence.getScientificNameAuthorship() + ""); if (occurrence.getInstitutionCode() != null) writer.append("" + occurrence.getInstitutionCode() + ""); if (occurrence.getCollectionCode() != null) writer.append("" + occurrence.getCollectionCode() + ""); if (occurrence.getCatalogueNumber() != null) writer.append("" + occurrence.getCatalogueNumber() + ""); if (occurrence.getIdentifiedBy() != null) writer.append("" + occurrence.getIdentifiedBy() + ""); if (occurrence.getRecordedBy() != null) writer.append("" + occurrence.getRecordedBy() + ""); if (occurrence.getScientificName() != null) writer.append("" + occurrence.getScientificName() + ""); if (occurrence.getKingdom() != null) writer.append("" + occurrence.getKingdom() + ""); if (occurrence.getFamily() != null) writer.append("" + occurrence.getFamily() + ""); if (occurrence.getLocality() != null) writer.append("" + occurrence.getLocality() + ""); if (occurrence.getEventDate() != null) { writer.append("" + df.format(occurrence.getEventDate().getTime()) + ""); writer.append("" + occurrence.getEventDate().get(Calendar.YEAR) + ""); } if (occurrence.getDecimalLatitude() != 0.0) writer.append("" + occurrence.getDecimalLatitude() + ""); if (occurrence.getDecimalLongitude() != 0.0) writer.append("" + occurrence.getDecimalLongitude() + ""); if (occurrence.getCoordinateUncertaintyInMeters() != null) writer.append("" + occurrence.getCoordinateUncertaintyInMeters() + ""); if (occurrence.getMaxDepth() != 0.0) writer.append("" + occurrence.getMaxDepth() + ""); if (occurrence.getMinDepth() != 0.0) writer.append("" + occurrence.getMinDepth() + ""); writer.append(""); completedEntries++; } writer.append(""); writer.flush(); writer.close(); return returnFile; }catch (Exception e) { logger.error("error writing occurrences as darwin core",e); throw e; }finally{ try { writer.close(); } catch (IOException e) { logger.warn("error closing the output stream",e); } } } }