From b73dce3e3a1a97aa899e1ef2b8a92067026d2508 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Wed, 3 Mar 2021 10:17:16 +0100 Subject: [PATCH] more logging on the MDStore mongodb client. Forcing UTF_8 encoding on the content --- .../java/eu/dnetlib/dhp/common/MdstoreClient.java | 11 +++++++++-- .../plugin/mongodb/MDStoreCollectorPlugin.java | 7 +++++++ .../xslt/XSLTTransformationFunction.java | 8 ++++++-- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java index d29498306..38837b557 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/MdstoreClient.java @@ -21,17 +21,19 @@ import com.mongodb.MongoClientURI; import com.mongodb.QueryBuilder; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class MdstoreClient implements Closeable { + private static final Logger log = LoggerFactory.getLogger(MdstoreClient.class); + private final MongoClient client; private final MongoDatabase db; private static final String COLL_METADATA = "metadata"; private static final String COLL_METADATA_MANAGER = "metadataManager"; - private static final Log log = LogFactory.getLog(MdstoreClient.class); - public MdstoreClient(final String baseUrl, final String dbName) { this.client = new MongoClient(new MongoClientURI(baseUrl)); this.db = getDb(client, dbName); @@ -40,11 +42,16 @@ public class MdstoreClient implements Closeable { public MongoCollection mdStore(final String mdId) { BasicDBObject query = (BasicDBObject) QueryBuilder.start("mdId").is(mdId).get(); + log.info("querying current mdId: {}", query.toJson()); + final String currentId = Optional .ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query)) .map(r -> r.first()) .map(d -> d.getString("currentId")) .orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId)); + + log.info("currentId: {}", currentId); + return getColl(db, currentId, true); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java index 77e899cc9..549c59720 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/mongodb/MDStoreCollectorPlugin.java @@ -8,6 +8,8 @@ import java.util.stream.Stream; import java.util.stream.StreamSupport; import org.bson.Document; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.mongodb.client.MongoCollection; @@ -19,6 +21,8 @@ import eu.dnetlib.dhp.common.MdstoreClient; public class MDStoreCollectorPlugin implements CollectorPlugin { + private static final Logger log = LoggerFactory.getLogger(MDStoreCollectorPlugin.class); + public static final String MONGODB_DBNAME = "mongodb_dbname"; public static final String MDSTORE_ID = "mdstore_id"; @@ -30,14 +34,17 @@ public class MDStoreCollectorPlugin implements CollectorPlugin { .orElseThrow( () -> new CollectorException( "missing mongodb baseUrl, expected in eu.dnetlib.dhp.collection.ApiDescriptor.baseUrl")); + log.info("mongoBaseUrl: {}", mongoBaseUrl); final String dbName = Optional .ofNullable(api.getParams().get(MONGODB_DBNAME)) .orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", MONGODB_DBNAME))); + log.info("dbName: {}", dbName); final String mdId = Optional .ofNullable(api.getParams().get(MDSTORE_ID)) .orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", MDSTORE_ID))); + log.info("mdId: {}", mdId); final MdstoreClient client = new MdstoreClient(mongoBaseUrl, dbName); final MongoCollection mdstore = client.mdStore(mdId); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java index d9b38e572..430fbcf95 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/transformation/xslt/XSLTTransformationFunction.java @@ -3,9 +3,11 @@ package eu.dnetlib.dhp.transformation.xslt; import java.io.ByteArrayInputStream; import java.io.StringWriter; +import java.nio.charset.StandardCharsets; import javax.xml.transform.stream.StreamSource; +import org.apache.commons.io.IOUtils; import org.apache.spark.api.java.function.MapFunction; import eu.dnetlib.dhp.aggregation.common.AggregationCounter; @@ -44,18 +46,20 @@ public class XSLTTransformationFunction implements MapFunction