more logging on the MDStore mongodb client. Forcing UTF_8 encoding on the content

This commit is contained in:
Claudio Atzori 2021-03-03 10:17:16 +01:00
parent e76c4f62c1
commit b73dce3e3a
3 changed files with 22 additions and 4 deletions

View File

@ -21,17 +21,19 @@ import com.mongodb.MongoClientURI;
import com.mongodb.QueryBuilder;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoDatabase;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MdstoreClient implements Closeable {
private static final Logger log = LoggerFactory.getLogger(MdstoreClient.class);
private final MongoClient client;
private final MongoDatabase db;
private static final String COLL_METADATA = "metadata";
private static final String COLL_METADATA_MANAGER = "metadataManager";
private static final Log log = LogFactory.getLog(MdstoreClient.class);
public MdstoreClient(final String baseUrl, final String dbName) {
this.client = new MongoClient(new MongoClientURI(baseUrl));
this.db = getDb(client, dbName);
@ -40,11 +42,16 @@ public class MdstoreClient implements Closeable {
public MongoCollection<Document> mdStore(final String mdId) {
BasicDBObject query = (BasicDBObject) QueryBuilder.start("mdId").is(mdId).get();
log.info("querying current mdId: {}", query.toJson());
final String currentId = Optional
.ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query))
.map(r -> r.first())
.map(d -> d.getString("currentId"))
.orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId));
log.info("currentId: {}", currentId);
return getColl(db, currentId, true);
}

View File

@ -8,6 +8,8 @@ import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.bson.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.mongodb.client.MongoCollection;
@ -19,6 +21,8 @@ import eu.dnetlib.dhp.common.MdstoreClient;
public class MDStoreCollectorPlugin implements CollectorPlugin {
private static final Logger log = LoggerFactory.getLogger(MDStoreCollectorPlugin.class);
public static final String MONGODB_DBNAME = "mongodb_dbname";
public static final String MDSTORE_ID = "mdstore_id";
@ -30,14 +34,17 @@ public class MDStoreCollectorPlugin implements CollectorPlugin {
.orElseThrow(
() -> new CollectorException(
"missing mongodb baseUrl, expected in eu.dnetlib.dhp.collection.ApiDescriptor.baseUrl"));
log.info("mongoBaseUrl: {}", mongoBaseUrl);
final String dbName = Optional
.ofNullable(api.getParams().get(MONGODB_DBNAME))
.orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", MONGODB_DBNAME)));
log.info("dbName: {}", dbName);
final String mdId = Optional
.ofNullable(api.getParams().get(MDSTORE_ID))
.orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", MDSTORE_ID)));
log.info("mdId: {}", mdId);
final MdstoreClient client = new MdstoreClient(mongoBaseUrl, dbName);
final MongoCollection<Document> mdstore = client.mdStore(mdId);

View File

@ -3,9 +3,11 @@ package eu.dnetlib.dhp.transformation.xslt;
import java.io.ByteArrayInputStream;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import javax.xml.transform.stream.StreamSource;
import org.apache.commons.io.IOUtils;
import org.apache.spark.api.java.function.MapFunction;
import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
@ -44,18 +46,20 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
Processor processor = new Processor(false);
processor.registerExtensionFunction(cleanFunction);
processor.registerExtensionFunction(new DateCleaner());
final XsltCompiler comp = processor.newXsltCompiler();
XsltExecutable xslt = comp
.compile(new StreamSource(new ByteArrayInputStream(transformationRule.getBytes())));
.compile(new StreamSource(IOUtils.toInputStream(transformationRule, StandardCharsets.UTF_8)));
XdmNode source = processor
.newDocumentBuilder()
.build(new StreamSource(new ByteArrayInputStream(value.getBody().getBytes())));
.build(new StreamSource(IOUtils.toInputStream(value.getBody(), StandardCharsets.UTF_8)));
XsltTransformer trans = xslt.load();
trans.setInitialContextNode(source);
final StringWriter output = new StringWriter();
Serializer out = processor.newSerializer(output);
out.setOutputProperty(Serializer.Property.METHOD, "xml");
out.setOutputProperty(Serializer.Property.INDENT, "yes");
trans.setDestination(out);
trans.transform();
final String xml = output.toString();