forked from antonis.lempesis/dnet-hadoop
more logging on the MDStore mongodb client. Forcing UTF_8 encoding on the content
This commit is contained in:
parent
e76c4f62c1
commit
b73dce3e3a
|
@ -21,17 +21,19 @@ import com.mongodb.MongoClientURI;
|
|||
import com.mongodb.QueryBuilder;
|
||||
import com.mongodb.client.MongoCollection;
|
||||
import com.mongodb.client.MongoDatabase;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class MdstoreClient implements Closeable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MdstoreClient.class);
|
||||
|
||||
private final MongoClient client;
|
||||
private final MongoDatabase db;
|
||||
|
||||
private static final String COLL_METADATA = "metadata";
|
||||
private static final String COLL_METADATA_MANAGER = "metadataManager";
|
||||
|
||||
private static final Log log = LogFactory.getLog(MdstoreClient.class);
|
||||
|
||||
public MdstoreClient(final String baseUrl, final String dbName) {
|
||||
this.client = new MongoClient(new MongoClientURI(baseUrl));
|
||||
this.db = getDb(client, dbName);
|
||||
|
@ -40,11 +42,16 @@ public class MdstoreClient implements Closeable {
|
|||
public MongoCollection<Document> mdStore(final String mdId) {
|
||||
BasicDBObject query = (BasicDBObject) QueryBuilder.start("mdId").is(mdId).get();
|
||||
|
||||
log.info("querying current mdId: {}", query.toJson());
|
||||
|
||||
final String currentId = Optional
|
||||
.ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query))
|
||||
.map(r -> r.first())
|
||||
.map(d -> d.getString("currentId"))
|
||||
.orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId));
|
||||
|
||||
log.info("currentId: {}", currentId);
|
||||
|
||||
return getColl(db, currentId, true);
|
||||
}
|
||||
|
||||
|
|
|
@ -8,6 +8,8 @@ import java.util.stream.Stream;
|
|||
import java.util.stream.StreamSupport;
|
||||
|
||||
import org.bson.Document;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.mongodb.client.MongoCollection;
|
||||
|
||||
|
@ -19,6 +21,8 @@ import eu.dnetlib.dhp.common.MdstoreClient;
|
|||
|
||||
public class MDStoreCollectorPlugin implements CollectorPlugin {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MDStoreCollectorPlugin.class);
|
||||
|
||||
public static final String MONGODB_DBNAME = "mongodb_dbname";
|
||||
public static final String MDSTORE_ID = "mdstore_id";
|
||||
|
||||
|
@ -30,14 +34,17 @@ public class MDStoreCollectorPlugin implements CollectorPlugin {
|
|||
.orElseThrow(
|
||||
() -> new CollectorException(
|
||||
"missing mongodb baseUrl, expected in eu.dnetlib.dhp.collection.ApiDescriptor.baseUrl"));
|
||||
log.info("mongoBaseUrl: {}", mongoBaseUrl);
|
||||
|
||||
final String dbName = Optional
|
||||
.ofNullable(api.getParams().get(MONGODB_DBNAME))
|
||||
.orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", MONGODB_DBNAME)));
|
||||
log.info("dbName: {}", dbName);
|
||||
|
||||
final String mdId = Optional
|
||||
.ofNullable(api.getParams().get(MDSTORE_ID))
|
||||
.orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", MDSTORE_ID)));
|
||||
log.info("mdId: {}", mdId);
|
||||
|
||||
final MdstoreClient client = new MdstoreClient(mongoBaseUrl, dbName);
|
||||
final MongoCollection<Document> mdstore = client.mdStore(mdId);
|
||||
|
|
|
@ -3,9 +3,11 @@ package eu.dnetlib.dhp.transformation.xslt;
|
|||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.StringWriter;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import javax.xml.transform.stream.StreamSource;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.spark.api.java.function.MapFunction;
|
||||
|
||||
import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
|
||||
|
@ -44,18 +46,20 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
|
|||
Processor processor = new Processor(false);
|
||||
processor.registerExtensionFunction(cleanFunction);
|
||||
processor.registerExtensionFunction(new DateCleaner());
|
||||
|
||||
final XsltCompiler comp = processor.newXsltCompiler();
|
||||
XsltExecutable xslt = comp
|
||||
.compile(new StreamSource(new ByteArrayInputStream(transformationRule.getBytes())));
|
||||
.compile(new StreamSource(IOUtils.toInputStream(transformationRule, StandardCharsets.UTF_8)));
|
||||
XdmNode source = processor
|
||||
.newDocumentBuilder()
|
||||
.build(new StreamSource(new ByteArrayInputStream(value.getBody().getBytes())));
|
||||
.build(new StreamSource(IOUtils.toInputStream(value.getBody(), StandardCharsets.UTF_8)));
|
||||
XsltTransformer trans = xslt.load();
|
||||
trans.setInitialContextNode(source);
|
||||
final StringWriter output = new StringWriter();
|
||||
Serializer out = processor.newSerializer(output);
|
||||
out.setOutputProperty(Serializer.Property.METHOD, "xml");
|
||||
out.setOutputProperty(Serializer.Property.INDENT, "yes");
|
||||
|
||||
trans.setDestination(out);
|
||||
trans.transform();
|
||||
final String xml = output.toString();
|
||||
|
|
Loading…
Reference in New Issue