dnet-core/dnet-data-services/src/main/java/eu/dnetlib/data/objectstore/filesystem/FileSystemObjectStore.java

384 lines
12 KiB
Java

package eu.dnetlib.data.objectstore.filesystem;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.regex.Pattern;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.gson.Gson;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.model.Filters;
import com.mongodb.client.result.DeleteResult;
import eu.dnetlib.data.objectstore.modular.ObjectStoreRecord;
import eu.dnetlib.data.objectstore.modular.connector.ObjectStore;
import eu.dnetlib.data.objectstore.rmi.MetadataObjectRecord;
import eu.dnetlib.data.objectstore.rmi.ObjectStoreFile;
import eu.dnetlib.data.objectstore.rmi.ObjectStoreFileNotFoundException;
import eu.dnetlib.data.objectstore.rmi.ObjectStoreServiceException;
import eu.dnetlib.enabling.resultset.ResultSetListener;
import eu.dnetlib.miscutils.collections.Pair;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.bson.conversions.Bson;
/**
* The Class FileSystemObjectStore.
*
* @author sandro
*/
public class FileSystemObjectStore implements ObjectStore {
/**
*
*/
private static final String URI_FIELD = "uri";
/**
*
*/
private static final String FS_PATH_FIELD = "fsPath";
/** The Constant log. */
private static final Log log = LogFactory.getLog(FileSystemObjectStore.class); // NOPMD by marko on 11/24/08 5:02 PM
/** The id. */
private final String id;
/** The interpretation. */
private final String interpretation;
/** The base path. */
private final String basePath;
/** The base uri. */
private final String baseURI;
/** The mongo metadata. */
private final MongoCollection<DBObject> mongoMetadata;
/**
* Instantiates a new file system object store.
*
* @param identifier
* the identifier
* @param interpretation
* the interpretation
* @param basePath
* the base path
* @param mongoMetadata
* the mongo metadata
* @param baseURI
* the base uri
*/
public FileSystemObjectStore(final String identifier, final String interpretation, final String basePath, final MongoCollection<DBObject> mongoMetadata,
final String baseURI) {
this.id = identifier;
this.basePath = basePath;
this.interpretation = interpretation;
this.mongoMetadata = mongoMetadata;
this.baseURI = baseURI;
}
/**
* {@inheritDoc}
*
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getId()
*/
@Override
public String getId() {
return this.id;
}
/**
* {@inheritDoc}
*
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getInterpretation()
*/
@Override
public String getInterpretation() {
return this.interpretation;
}
/**
* {@inheritDoc}
*
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#feed(java.lang.Iterable, boolean)
*/
@Override
public int feed(final Iterable<ObjectStoreRecord> records, final boolean incremental) throws ObjectStoreServiceException {
if (records == null)
return 0;
Path baseDirPath = FileSystems.getDefault().getPath(getBasePath()).resolve(getId());
if (!Files.exists(baseDirPath))
throw new ObjectStoreServiceException("Error can't feed objects because the folder " + baseDirPath + " does not exist");
int addedCounter = 0;
int nulls = 0;
for (ObjectStoreRecord record : records) {
String url = feedObject(record);
if (StringUtils.isNotBlank(url)) {
addedCounter++;
} else {
nulls++;
}
}
if (nulls > 0) {
log.warn(String.format("Found %s null records", nulls));
}
return addedCounter;
}
/**
* {@inheritDoc}
*
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#feedMetadataRecord(java.lang.Iterable, boolean)
*
* This method handles the case of web crawl files and other cases when the metadata in mdstores are also the objects to put into the objectstores.
*/
@Override
public int feedMetadataRecord(final Iterable<MetadataObjectRecord> records, final boolean incremental) throws ObjectStoreServiceException {
Iterable<ObjectStoreRecord> it = Iterables.transform(records, mor -> {
ObjectStoreRecord r = new ObjectStoreRecord();
r.setInputStream(new ByteArrayInputStream(mor.getRecord().getBytes()));
ObjectStoreFile fileMetadata = new ObjectStoreFile();
fileMetadata.setObjectID(mor.getId());
fileMetadata.setMimeType(mor.getMime());
r.setFileMetadata(fileMetadata);
return r;
});
return feed(it, incremental);
}
/**
* {@inheritDoc}
*
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#feedObjectRecord(eu.dnetlib.data.objectstore.modular.ObjectStoreRecord)
*/
@Override
public String feedObjectRecord(final ObjectStoreRecord record) throws ObjectStoreServiceException {
return feedObject(record);
}
private String feedObject(final ObjectStoreRecord record) {
if (record != null) {
String objectIdentifier = record.getFileMetadata().getObjectID();
if (StringUtils.isNotBlank(objectIdentifier)) {
final Path objResolvedPath = FileSystemUtility.objectStoreFilePath(basePath, id, objectIdentifier);
if (Files.notExists(objResolvedPath)) {
try {
log.debug("Creation of folder " + objResolvedPath.getParent());
Files.createDirectories(objResolvedPath.getParent());
log.debug("Folder " + objResolvedPath.getParent() + " created");
String md5Sum = null;
Long size = new Long(0);
if (record.getInputStream() != null) {
Pair<String, Long> infos = FileSystemUtility.saveAndGenerateMD5(record.getInputStream(), objResolvedPath);
md5Sum = infos.getKey();
size = infos.getValue();
}
final String url =
ModularObjectStoreRESTService.retrieveURL(getBaseURI(), getBasePath(), getId(), record.getFileMetadata().getObjectID());
if (StringUtils.isNotBlank(md5Sum)) {
double timestamp = System.currentTimeMillis();
BasicDBObject metadata = new BasicDBObject();
metadata.put("id", record.getFileMetadata().getObjectID());
metadata.put("mime", record.getFileMetadata().getMimeType());
metadata.put("originalObject", record.getFileMetadata().toJSON());
metadata.put("timestamp", timestamp);
metadata.put("md5Sum", md5Sum);
metadata.put("size", size);
metadata.put(FS_PATH_FIELD, objResolvedPath.toAbsolutePath().toString());
metadata.put(URI_FIELD, url);
log.debug("saving metadata object to the collection: " + metadata.toString());
mongoMetadata.insertOne(metadata);
}
return url;
} catch (Exception e) {
log.error("Something bad happen on inserting Record", e);
log.error("Record: " + new Gson().toJson(record.getFileMetadata()));
} finally {
if (record.getInputStream() != null) {
try {
record.getInputStream().close();
} catch (Exception e) {
log.error("Error on close inputStream", e);
}
}
}
} else {
log.debug("The File in the path" + objResolvedPath.getParent() + "exists ");
}
}
}
return null;
}
/**
* {@inheritDoc}
*
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deliver(java.lang.Long, java.lang.Long)
*/
@Override
public ResultSetListener deliver(final Long from, final Long until) throws ObjectStoreServiceException {
FileSystemObjectStoreResultSetListener resultSet = new FileSystemObjectStoreResultSetListener();
resultSet.setBaseURI(getBaseURI());
resultSet.setMongoCollection(mongoMetadata);
resultSet.setObjectStoreID(getId());
resultSet.setFromDate(from);
resultSet.setUntilDate(until);
resultSet.setBasePath(getBasePath());
return resultSet;
}
/**
* {@inheritDoc}
*
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deliverIds(java.lang.Iterable)
*/
@Override
public ResultSetListener deliverIds(final Iterable<String> ids) throws ObjectStoreServiceException {
FileSystemObjectStoreResultSetListener resultSet = new FileSystemObjectStoreResultSetListener();
resultSet.setBaseURI(getBaseURI());
resultSet.setMongoCollection(mongoMetadata);
resultSet.setObjectStoreID(getId());
resultSet.setRecords(Lists.newArrayList(ids));
resultSet.setBasePath(basePath);
return resultSet;
}
/**
* {@inheritDoc}
*
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deliverObject(java.lang.String)
*/
@Override
public ObjectStoreFile deliverObject(final String objectId) throws ObjectStoreServiceException {
Bson query = Filters.eq("id", objectId);
DBObject resultQuery = mongoMetadata.find(query).first();
checkAndGetFsPathField(resultQuery, objectId);
return ObjectStoreFileUtility.build(resultQuery, getBaseURI(), getId(), basePath);
}
private String checkAndGetFsPathField(final DBObject resultQuery, final String objectId) throws ObjectStoreServiceException {
if (resultQuery == null || !resultQuery.containsField(FS_PATH_FIELD))
throw new ObjectStoreFileNotFoundException("Object with identifier :" + objectId + " not found or missing " + FS_PATH_FIELD + " field");
String pathStr = (String) resultQuery.get(FS_PATH_FIELD);
if (StringUtils.isBlank(pathStr))
throw new ObjectStoreFileNotFoundException("Object with identifier :" + objectId + " with blank " + FS_PATH_FIELD);
return pathStr;
}
/**
* {@inheritDoc}
*
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getSize()
*/
@Override
public int getSize() throws ObjectStoreServiceException {
return (int) mongoMetadata.count();
}
/**
* {@inheritDoc}
*
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#deleteObject(java.lang.String)
*/
@Override
public void deleteObject(final String objectId) throws ObjectStoreServiceException {
Bson query = Filters.eq("id", objectId);
DBObject response = mongoMetadata.find(query).first();
String pathStr = checkAndGetFsPathField(response, objectId);
Path path = FileSystems.getDefault().getPath(pathStr);
if (Files.notExists(path))
throw new ObjectStoreFileNotFoundException("Object with identifier :" + objectId + " not found in the assigned path " + path);
try {
Files.delete(path);
} catch (IOException e) {
throw new ObjectStoreServiceException("An error occurs on delete file ", e);
}
mongoMetadata.deleteOne(query);
}
/**
* {@inheritDoc}
*
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#getObject(java.lang.String)
*/
@Override
public String getObject(final String recordId) throws ObjectStoreServiceException {
Bson query = Filters.eq("id", recordId);
DBObject response = mongoMetadata.find(query).first();
if (response == null || !response.containsField(URI_FIELD))
return null;
return (String) response.get(URI_FIELD);
}
/**
* {@inheritDoc}
*
* @see eu.dnetlib.data.objectstore.modular.connector.ObjectStore#existIDStartsWith(java.lang.String)
*/
@Override
public boolean existIDStartsWith(final String startId) throws ObjectStoreServiceException {
Bson query = Filters.regex("id", Pattern.compile(startId));
return mongoMetadata.count(query) > 0;
}
@Override
public boolean dropContent() throws ObjectStoreServiceException {
if (getBasePath() == null) {
throw new ObjectStoreServiceException("Error on dropping object store base_path required");
}
final Path baseDirPath = FileSystems.getDefault().getPath(getBasePath()).resolve(getId());
try {
FileSystemUtility.deleteFolderRecursive(baseDirPath);
} catch (IOException e) {
throw new ObjectStoreServiceException("Error on dropping store ", e);
}
log.info("Deleted folder" + baseDirPath.toString());
if (!Files.exists(baseDirPath)) {
log.info("Recreating folder " + baseDirPath);
try {
Files.createDirectory(baseDirPath);
} catch (IOException e) {
throw new ObjectStoreServiceException("Error on dropping store ", e);
}
}
final DeleteResult deleteResult = this.mongoMetadata.deleteMany(new BasicDBObject());
log.info("Dropped content for object store " + id + ". " + deleteResult.getDeletedCount() + " object(s) deleted.");
return true;
}
@Override
public String toString() {
return "FileSystemObjectStore{" +
"id='" + getId() + '\'' +
", interpretation='" + getInterpretation() + '\'' +
", basePath='" + getBasePath() + '\'' +
", baseURI='" + getBaseURI() + '\'' +
'}';
}
/**
* Gets the base uri.
*
* @return the baseURI
*/
public String getBaseURI() {
return baseURI;
}
public String getBasePath() {
return basePath;
}
}