forked from D-Net/dnet-hadoop
when migrating actionsets from DM cluster, populate the AtomicAction.targetValue when empty (dedup similarities)
This commit is contained in:
parent
9c84e21b87
commit
af835f2f98
|
@ -8,7 +8,10 @@ import com.google.protobuf.InvalidProtocolBufferException;
|
||||||
import eu.dnetlib.actionmanager.actions.AtomicAction;
|
import eu.dnetlib.actionmanager.actions.AtomicAction;
|
||||||
import eu.dnetlib.data.proto.OafProtos;
|
import eu.dnetlib.data.proto.OafProtos;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
|
||||||
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
|
||||||
|
@ -82,19 +85,55 @@ public class TransformActions implements Serializable {
|
||||||
|
|
||||||
sc.sequenceFile(sourcePath, Text.class, Text.class)
|
sc.sequenceFile(sourcePath, Text.class, Text.class)
|
||||||
.mapToPair(a -> new Tuple2<>(a._1(), AtomicAction.fromJSON(a._2().toString())))
|
.mapToPair(a -> new Tuple2<>(a._1(), AtomicAction.fromJSON(a._2().toString())))
|
||||||
.mapToPair(a -> new Tuple2<>(a._1(), transformAction(a._2())))
|
.mapToPair(a -> new Tuple2<>(a._1(), transformAction(a._1().toString(), a._2())))
|
||||||
|
|
||||||
.saveAsHadoopFile(targetDirectory.toString(), Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
|
.saveAsHadoopFile(targetDirectory.toString(), Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private Text transformAction(AtomicAction aa) throws InvalidProtocolBufferException, JsonProcessingException {
|
private Text transformAction(String atomicaActionId, AtomicAction aa) throws InvalidProtocolBufferException, JsonProcessingException {
|
||||||
|
|
||||||
final ObjectMapper mapper = new ObjectMapper();
|
final ObjectMapper mapper = new ObjectMapper();
|
||||||
if (aa.getTargetValue() != null && aa.getTargetValue().length > 0) {
|
if (aa.getTargetValue() != null && aa.getTargetValue().length > 0) {
|
||||||
Oaf oaf = ProtoConverter.convert(OafProtos.Oaf.parseFrom(aa.getTargetValue()));
|
Oaf oaf = ProtoConverter.convert(OafProtos.Oaf.parseFrom(aa.getTargetValue()));
|
||||||
aa.setTargetValue(mapper.writeValueAsString(oaf).getBytes());
|
aa.setTargetValue(mapper.writeValueAsString(oaf).getBytes());
|
||||||
|
} else {
|
||||||
|
|
||||||
|
if (atomicaActionId.contains("dedupSimilarity")) {
|
||||||
|
|
||||||
|
final String[] splitId = atomicaActionId.split("@");
|
||||||
|
|
||||||
|
String source = splitId[0];
|
||||||
|
String target = splitId[2];
|
||||||
|
|
||||||
|
String[] relSemantic = splitId[1].split("_");
|
||||||
|
|
||||||
|
Relation rel = new Relation();
|
||||||
|
rel.setSource(source);
|
||||||
|
rel.setTarget(target);
|
||||||
|
rel.setRelType(relSemantic[0]);
|
||||||
|
rel.setSubRelType(relSemantic[1]);
|
||||||
|
rel.setRelClass(relSemantic[2]);
|
||||||
|
|
||||||
|
DataInfo d = new DataInfo();
|
||||||
|
d.setDeletedbyinference(false);
|
||||||
|
d.setInferenceprovenance("deduplication");
|
||||||
|
d.setInferred(true);
|
||||||
|
d.setInvisible(false);
|
||||||
|
Qualifier provenanceaction = new Qualifier();
|
||||||
|
|
||||||
|
provenanceaction.setClassid("deduplication");
|
||||||
|
provenanceaction.setClassname("deduplication");
|
||||||
|
provenanceaction.setSchemeid("dnet:provenanceActions");
|
||||||
|
provenanceaction.setSchemename("dnet:provenanceActions");
|
||||||
|
|
||||||
|
d.setProvenanceaction(provenanceaction);
|
||||||
|
|
||||||
|
rel.setDataInfo(d);
|
||||||
|
|
||||||
|
aa.setTargetValue(mapper.writeValueAsString(rel).getBytes());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new Text(mapper.writeValueAsString(aa));
|
return new Text(mapper.writeValueAsString(aa));
|
||||||
|
|
Loading…
Reference in New Issue