forked from antonis.lempesis/dnet-hadoop
more logging on the MDStore mongodb client. Forcing UTF_8 encoding on the content
This commit is contained in:
parent
e76c4f62c1
commit
b73dce3e3a
|
@ -21,17 +21,19 @@ import com.mongodb.MongoClientURI;
|
||||||
import com.mongodb.QueryBuilder;
|
import com.mongodb.QueryBuilder;
|
||||||
import com.mongodb.client.MongoCollection;
|
import com.mongodb.client.MongoCollection;
|
||||||
import com.mongodb.client.MongoDatabase;
|
import com.mongodb.client.MongoDatabase;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
public class MdstoreClient implements Closeable {
|
public class MdstoreClient implements Closeable {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(MdstoreClient.class);
|
||||||
|
|
||||||
private final MongoClient client;
|
private final MongoClient client;
|
||||||
private final MongoDatabase db;
|
private final MongoDatabase db;
|
||||||
|
|
||||||
private static final String COLL_METADATA = "metadata";
|
private static final String COLL_METADATA = "metadata";
|
||||||
private static final String COLL_METADATA_MANAGER = "metadataManager";
|
private static final String COLL_METADATA_MANAGER = "metadataManager";
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(MdstoreClient.class);
|
|
||||||
|
|
||||||
public MdstoreClient(final String baseUrl, final String dbName) {
|
public MdstoreClient(final String baseUrl, final String dbName) {
|
||||||
this.client = new MongoClient(new MongoClientURI(baseUrl));
|
this.client = new MongoClient(new MongoClientURI(baseUrl));
|
||||||
this.db = getDb(client, dbName);
|
this.db = getDb(client, dbName);
|
||||||
|
@ -40,11 +42,16 @@ public class MdstoreClient implements Closeable {
|
||||||
public MongoCollection<Document> mdStore(final String mdId) {
|
public MongoCollection<Document> mdStore(final String mdId) {
|
||||||
BasicDBObject query = (BasicDBObject) QueryBuilder.start("mdId").is(mdId).get();
|
BasicDBObject query = (BasicDBObject) QueryBuilder.start("mdId").is(mdId).get();
|
||||||
|
|
||||||
|
log.info("querying current mdId: {}", query.toJson());
|
||||||
|
|
||||||
final String currentId = Optional
|
final String currentId = Optional
|
||||||
.ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query))
|
.ofNullable(getColl(db, COLL_METADATA_MANAGER, true).find(query))
|
||||||
.map(r -> r.first())
|
.map(r -> r.first())
|
||||||
.map(d -> d.getString("currentId"))
|
.map(d -> d.getString("currentId"))
|
||||||
.orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId));
|
.orElseThrow(() -> new IllegalArgumentException("cannot find current mdstore id for: " + mdId));
|
||||||
|
|
||||||
|
log.info("currentId: {}", currentId);
|
||||||
|
|
||||||
return getColl(db, currentId, true);
|
return getColl(db, currentId, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,8 @@ import java.util.stream.Stream;
|
||||||
import java.util.stream.StreamSupport;
|
import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
import org.bson.Document;
|
import org.bson.Document;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.mongodb.client.MongoCollection;
|
import com.mongodb.client.MongoCollection;
|
||||||
|
|
||||||
|
@ -19,6 +21,8 @@ import eu.dnetlib.dhp.common.MdstoreClient;
|
||||||
|
|
||||||
public class MDStoreCollectorPlugin implements CollectorPlugin {
|
public class MDStoreCollectorPlugin implements CollectorPlugin {
|
||||||
|
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(MDStoreCollectorPlugin.class);
|
||||||
|
|
||||||
public static final String MONGODB_DBNAME = "mongodb_dbname";
|
public static final String MONGODB_DBNAME = "mongodb_dbname";
|
||||||
public static final String MDSTORE_ID = "mdstore_id";
|
public static final String MDSTORE_ID = "mdstore_id";
|
||||||
|
|
||||||
|
@ -30,14 +34,17 @@ public class MDStoreCollectorPlugin implements CollectorPlugin {
|
||||||
.orElseThrow(
|
.orElseThrow(
|
||||||
() -> new CollectorException(
|
() -> new CollectorException(
|
||||||
"missing mongodb baseUrl, expected in eu.dnetlib.dhp.collection.ApiDescriptor.baseUrl"));
|
"missing mongodb baseUrl, expected in eu.dnetlib.dhp.collection.ApiDescriptor.baseUrl"));
|
||||||
|
log.info("mongoBaseUrl: {}", mongoBaseUrl);
|
||||||
|
|
||||||
final String dbName = Optional
|
final String dbName = Optional
|
||||||
.ofNullable(api.getParams().get(MONGODB_DBNAME))
|
.ofNullable(api.getParams().get(MONGODB_DBNAME))
|
||||||
.orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", MONGODB_DBNAME)));
|
.orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", MONGODB_DBNAME)));
|
||||||
|
log.info("dbName: {}", dbName);
|
||||||
|
|
||||||
final String mdId = Optional
|
final String mdId = Optional
|
||||||
.ofNullable(api.getParams().get(MDSTORE_ID))
|
.ofNullable(api.getParams().get(MDSTORE_ID))
|
||||||
.orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", MDSTORE_ID)));
|
.orElseThrow(() -> new CollectorException(String.format("missing parameter '%s'", MDSTORE_ID)));
|
||||||
|
log.info("mdId: {}", mdId);
|
||||||
|
|
||||||
final MdstoreClient client = new MdstoreClient(mongoBaseUrl, dbName);
|
final MdstoreClient client = new MdstoreClient(mongoBaseUrl, dbName);
|
||||||
final MongoCollection<Document> mdstore = client.mdStore(mdId);
|
final MongoCollection<Document> mdstore = client.mdStore(mdId);
|
||||||
|
|
|
@ -3,9 +3,11 @@ package eu.dnetlib.dhp.transformation.xslt;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
import javax.xml.transform.stream.StreamSource;
|
import javax.xml.transform.stream.StreamSource;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
|
import eu.dnetlib.dhp.aggregation.common.AggregationCounter;
|
||||||
|
@ -44,18 +46,20 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
|
||||||
Processor processor = new Processor(false);
|
Processor processor = new Processor(false);
|
||||||
processor.registerExtensionFunction(cleanFunction);
|
processor.registerExtensionFunction(cleanFunction);
|
||||||
processor.registerExtensionFunction(new DateCleaner());
|
processor.registerExtensionFunction(new DateCleaner());
|
||||||
|
|
||||||
final XsltCompiler comp = processor.newXsltCompiler();
|
final XsltCompiler comp = processor.newXsltCompiler();
|
||||||
XsltExecutable xslt = comp
|
XsltExecutable xslt = comp
|
||||||
.compile(new StreamSource(new ByteArrayInputStream(transformationRule.getBytes())));
|
.compile(new StreamSource(IOUtils.toInputStream(transformationRule, StandardCharsets.UTF_8)));
|
||||||
XdmNode source = processor
|
XdmNode source = processor
|
||||||
.newDocumentBuilder()
|
.newDocumentBuilder()
|
||||||
.build(new StreamSource(new ByteArrayInputStream(value.getBody().getBytes())));
|
.build(new StreamSource(IOUtils.toInputStream(value.getBody(), StandardCharsets.UTF_8)));
|
||||||
XsltTransformer trans = xslt.load();
|
XsltTransformer trans = xslt.load();
|
||||||
trans.setInitialContextNode(source);
|
trans.setInitialContextNode(source);
|
||||||
final StringWriter output = new StringWriter();
|
final StringWriter output = new StringWriter();
|
||||||
Serializer out = processor.newSerializer(output);
|
Serializer out = processor.newSerializer(output);
|
||||||
out.setOutputProperty(Serializer.Property.METHOD, "xml");
|
out.setOutputProperty(Serializer.Property.METHOD, "xml");
|
||||||
out.setOutputProperty(Serializer.Property.INDENT, "yes");
|
out.setOutputProperty(Serializer.Property.INDENT, "yes");
|
||||||
|
|
||||||
trans.setDestination(out);
|
trans.setDestination(out);
|
||||||
trans.transform();
|
trans.transform();
|
||||||
final String xml = output.toString();
|
final String xml = output.toString();
|
||||||
|
|
Loading…
Reference in New Issue