1
0
Fork 0

code formatting

This commit is contained in:
Claudio Atzori 2023-08-29 11:11:00 +02:00
parent 0515d81c7c
commit bf35280ea6
5 changed files with 173 additions and 163 deletions

View File

@ -292,7 +292,8 @@ public class GraphCleaningFunctions extends CleaningFunctions {
} else if (value instanceof Result) {
Result r = (Result) value;
if (Objects.nonNull(r.getFulltext()) && (ModelConstants.SOFTWARE_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()) ||
if (Objects.nonNull(r.getFulltext())
&& (ModelConstants.SOFTWARE_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()) ||
ModelConstants.DATASET_RESULTTYPE_CLASSID.equals(r.getResulttype().getClassid()))) {
r.setFulltext(null);
@ -326,10 +327,13 @@ public class GraphCleaningFunctions extends CleaningFunctions {
if (StringUtils.isBlank(r.getPublisher().getValue())) {
r.setPublisher(null);
} else {
r.getPublisher().setValue(
r.getPublisher().getValue()
.replaceAll(NAME_CLEANING_REGEX, " ")
);
r
.getPublisher()
.setValue(
r
.getPublisher()
.getValue()
.replaceAll(NAME_CLEANING_REGEX, " "));
}
}
if (Objects.isNull(r.getLanguage()) || StringUtils.isBlank(r.getLanguage().getClassid())) {
@ -623,25 +627,28 @@ public class GraphCleaningFunctions extends CleaningFunctions {
private static Author cleanupAuthor(Author author) {
if (StringUtils.isNotBlank(author.getFullname())) {
author.setFullname(
author.getFullname()
author
.setFullname(
author
.getFullname()
.replaceAll(NAME_CLEANING_REGEX, " ")
.replace("\"", "\\\"")
);
.replace("\"", "\\\""));
}
if (StringUtils.isNotBlank(author.getName())) {
author.setName(
author.getName()
author
.setName(
author
.getName()
.replaceAll(NAME_CLEANING_REGEX, " ")
.replace("\"", "\\\"")
);
.replace("\"", "\\\""));
}
if (StringUtils.isNotBlank(author.getSurname())) {
author.setSurname(
author.getSurname()
author
.setSurname(
author
.getSurname()
.replaceAll(NAME_CLEANING_REGEX, " ")
.replace("\"", "\\\"")
);
.replace("\"", "\\\""));
}
return author;

View File

@ -18,7 +18,6 @@ package eu.dnetlib.pace.util;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;

View File

@ -1,13 +1,13 @@
package eu.dnetlib.dhp.oa.dedup;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import static org.apache.spark.sql.functions.col;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.Objects;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
@ -18,16 +18,17 @@ import org.apache.spark.api.java.function.ReduceFunction;
import org.apache.spark.sql.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.common.ModelConstants;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.DataInfo;
import eu.dnetlib.dhp.schema.oaf.Relation;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import scala.Tuple2;
import scala.Tuple3;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.Objects;
import static org.apache.spark.sql.functions.col;
public class SparkPropagateRelation extends AbstractSparkAction {
private static final Logger log = LoggerFactory.getLogger(SparkPropagateRelation.class);
@ -84,7 +85,8 @@ public class SparkPropagateRelation extends AbstractSparkAction {
.distinct()
.cache();
Dataset<Row> allRels = spark.read()
Dataset<Row> allRels = spark
.read()
.schema(REL_BEAN_ENC.schema())
.json(DedupUtility.createEntityPath(graphBasePath, "relation"));
@ -95,7 +97,8 @@ public class SparkPropagateRelation extends AbstractSparkAction {
.as(Encoders.tuple(REL_BEAN_ENC, Encoders.STRING(), Encoders.STRING()))
.flatMap(SparkPropagateRelation::addInferredRelations, REL_KRYO_ENC);
Dataset<Relation> processedRelations = distinctRelations(dedupedRels.union(mergeRels.map((MapFunction<Relation, Relation>) r -> r, REL_KRYO_ENC)))
Dataset<Relation> processedRelations = distinctRelations(
dedupedRels.union(mergeRels.map((MapFunction<Relation, Relation>) r -> r, REL_KRYO_ENC)))
.filter((FilterFunction<Relation>) r -> !Objects.equals(r.getSource(), r.getTarget()));
save(processedRelations, outputRelationPath, SaveMode.Overwrite);
@ -141,8 +144,7 @@ public class SparkPropagateRelation extends AbstractSparkAction {
.reduceGroups((ReduceFunction<Relation>) (b, a) -> {
b.mergeFrom(a);
return b;
}
)
})
.map((MapFunction<Tuple2<String, Relation>, Relation>) Tuple2::_2, REL_BEAN_ENC);
}

View File

@ -1,14 +1,14 @@
package eu.dnetlib.dhp.oa.graph.group;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob;
import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.utils.DHPUtils;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.spark.SparkConf;
@ -19,13 +19,15 @@ import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.*;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import static org.junit.jupiter.api.Assertions.assertEquals;
import eu.dnetlib.dhp.common.HdfsSupport;
import eu.dnetlib.dhp.oa.merge.DispatchEntitiesSparkJob;
import eu.dnetlib.dhp.oa.merge.GroupEntitiesSparkJob;
import eu.dnetlib.dhp.schema.common.ModelSupport;
import eu.dnetlib.dhp.schema.oaf.Result;
import eu.dnetlib.dhp.utils.DHPUtils;
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
public class GroupEntitiesSparkJobTest {