dnet-dedup/dnet-dedup-test/src/test/java/eu/dnetlib/pace/AbstractProtoPaceTest.java

209 lines
8.2 KiB
Java

package eu.dnetlib.pace;
import com.google.common.collect.Lists;
import com.google.gson.Gson;
import eu.dnetlib.data.proto.FieldTypeProtos.Author;
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty.Builder;
import eu.dnetlib.data.proto.OafProtos.Oaf;
import eu.dnetlib.data.proto.OafProtos.OafEntity;
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
import eu.dnetlib.data.proto.ResultProtos.Result;
import eu.dnetlib.pace.config.Config;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.config.Type;
import eu.dnetlib.pace.model.*;
import eu.dnetlib.pace.model.gt.GTAuthor;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.RandomUtils;
import java.io.IOException;
import java.io.StringWriter;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public abstract class AbstractProtoPaceTest extends OafTest {
protected DedupConfig getOrganizationCurrentConf() {
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/config/organization.current.conf"));
}
protected DedupConfig getOrganizationTestConf() {
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/config/organization.test.conf"));
}
protected MapDocument author(final Config conf, final String id, final Oaf oaf) {
return ProtoDocumentBuilder.newInstance(id, oaf.getEntity(), conf.model());
}
protected GTAuthor getGTAuthor(final String path) {
final Gson gson = new Gson();
final String json = readFromClasspath(path);
final GTAuthor gta = gson.fromJson(json, GTAuthor.class);
return gta;
}
protected String readFromClasspath(final String filename) {
final StringWriter sw = new StringWriter();
try {
IOUtils.copy(getClass().getResourceAsStream(filename), sw);
return sw.toString();
} catch (final IOException e) {
throw new RuntimeException("cannot load resource from classpath: " + filename);
}
}
protected MapDocument result(final Config config, final String id, final String title) {
return result(config, id, title, null, new ArrayList<>(), null);
}
protected MapDocument result(final Config config, final String id, final String title, final String date) {
return result(config, id, title, date, new ArrayList<>(), null);
}
protected MapDocument result(final Config config, final String id, final String title, final String date, final List<String> pid) {
return result(config, id, title, date, pid, null);
}
protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid) {
return result(config, id, title, date, pid, null);
}
protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid, final List<String> authors) {
return result(config, id, title, date, Lists.newArrayList(pid), authors);
}
protected MapDocument author(final String identifier, final String area, final String firstname, final String lastname, final String fullname, final Double[] topics, final String pubID, final String pubDOI, final int rank, final String orcid, final List<String> coauthors) {
Map<String, Field> fieldMap = new HashMap<>();
fieldMap.put("area", new FieldValueImpl(Type.String, "area", area));
fieldMap.put("firstname", new FieldValueImpl(Type.String, "firstname", firstname));
fieldMap.put("lastname", new FieldValueImpl(Type.String, "lastname", lastname));
fieldMap.put("fullname", new FieldValueImpl(Type.String, "fullname", fullname));
fieldMap.put("pubID", new FieldValueImpl(Type.String, "pubID", pubID));
fieldMap.put("pubDOI", new FieldValueImpl(Type.String, "pubDOI", pubDOI));
fieldMap.put("rank", new FieldValueImpl(Type.Int, "rank", rank));
fieldMap.put("orcid", new FieldValueImpl(Type.String, "orcid", orcid));
FieldListImpl ca = new FieldListImpl("coauthors", Type.String);
ca.addAll(coauthors.stream().map(s -> new FieldValueImpl(Type.String, "coauthors", s)).collect(Collectors.toList()));
fieldMap.put("coauthors", ca);
FieldListImpl t = new FieldListImpl("topics", Type.String);
t.addAll(Arrays.asList(topics).stream().map(d -> new FieldValueImpl(Type.String, "topics", d.toString())).collect(Collectors.toList()));
fieldMap.put("topics", t);
return new MapDocument(identifier, fieldMap);
}
static List<String> pidTypes = Lists.newArrayList();
static {
pidTypes.add("doi");
//pidTypes.add("oai");
//pidTypes.add("pmid");
}
protected MapDocument result(final Config config, final String id, final String title, final String date, final List<String> pid, final List<String> authors) {
final Result.Metadata.Builder metadata = Result.Metadata.newBuilder();
if (!StringUtils.isBlank(title)) {
metadata.addTitle(getStruct(title, getQualifier("main title", "dnet:titles")));
metadata.addTitle(getStruct(RandomStringUtils.randomAlphabetic(10), getQualifier("alternative title", "dnet:titles")));
}
if (!StringUtils.isBlank(date)) {
metadata.setDateofacceptance(sf(date));
}
final OafEntity.Builder entity = oafEntity(id, eu.dnetlib.data.proto.TypeProtos.Type.result);
final Result.Builder result = Result.newBuilder().setMetadata(metadata);
if (authors != null) {
result.getMetadataBuilder().addAllAuthor(
IntStream.range(0, authors.size())
.mapToObj(i -> author(authors.get(i), i))
.collect(Collectors.toCollection(LinkedList::new)));
}
entity.setResult(result);
if (pid != null) {
for(String p : pid) {
if (!StringUtils.isBlank(p)) {
entity.addPid(sp(p, pidTypes.get(RandomUtils.nextInt(0, pidTypes.size() - 1))));
//entity.addPid(sp(RandomStringUtils.randomAlphabetic(10), "oai"));
}
}
}
final OafEntity build = entity.build();
return ProtoDocumentBuilder.newInstance(id, build, config.model());
}
private Author author(final String s, int rank) {
final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(s, false);
final Author.Builder author = Author.newBuilder();
if (p.isAccurate()) {
author.setName(p.getNormalisedFirstName());
author.setSurname(p.getNormalisedSurname());
}
author.setFullname(p.getNormalisedFullname());
author.setRank(rank);
return author.build();
}
private OafEntity.Builder oafEntity(final String id, final eu.dnetlib.data.proto.TypeProtos.Type type) {
final OafEntity.Builder entity = OafEntity.newBuilder().setId(id).setType(type);
return entity;
}
protected MapDocument organization(final Config config, final String id, final String legalName) {
return organization(config, id, legalName, null);
}
protected MapDocument organization(final Config config, final String id, final String legalName, final String legalShortName) {
final Organization.Metadata.Builder metadata = Organization.Metadata.newBuilder();
if (legalName != null) {
metadata.setLegalname(sf(legalName));
}
if (legalShortName != null) {
metadata.setLegalshortname(sf(legalShortName));
}
final OafEntity.Builder entity = oafEntity(id, eu.dnetlib.data.proto.TypeProtos.Type.result);
entity.setOrganization(Organization.newBuilder().setMetadata(metadata));
return ProtoDocumentBuilder.newInstance(id, entity.build(), config.model());
}
private StructuredProperty sp(final String pid, final String type) {
final Builder pidSp =
StructuredProperty.newBuilder().setValue(pid)
.setQualifier(Qualifier.newBuilder().setClassid(type).setClassname(type).setSchemeid("dnet:pid_types").setSchemename("dnet:pid_types"));
return pidSp.build();
}
protected Field title(final String s) {
return new FieldValueImpl(Type.String, "title", s);
}
protected static Builder getStruct(final String value, final Qualifier.Builder qualifier) {
return StructuredProperty.newBuilder().setValue(value).setQualifier(qualifier);
}
/*
* protected static StringField.Builder sf(final String s) { return StringField.newBuilder().setValue(s); }
*
* protected static Qualifier.Builder getQualifier(final String classname, final String schemename) { return
* Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename); }
*/
}