BrBETA_dnet-hadoop/dhp-workflows/dhp-doiboost/src/test/java/eu/dnetlib/doiboost/orcidnodoi/xml/OrcidNoDoiTest.java


package eu.dnetlib.doiboost.orcidnodoi.xml;

import com.ximpleware.NavException;
import com.ximpleware.ParseException;
import com.ximpleware.XPathEvalException;
import com.ximpleware.XPathParseException;
import eu.dnetlib.dhp.parser.utility.VtdException;
import eu.dnetlib.doiboost.orcid.model.AuthorData;
import eu.dnetlib.doiboost.orcidnodoi.model.Contributor;
import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi;
import jdk.nashorn.internal.ir.annotations.Ignore;
import org.apache.commons.io.IOUtils;
import org.apache.commons.text.similarity.JaccardSimilarity;
import org.apache.commons.text.similarity.JaroWinklerSimilarity;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.text.Normalizer;
import java.util.*;

import static org.junit.jupiter.api.Assertions.assertNotNull;

public class OrcidNoDoiTest {

	private static final Logger logger = LoggerFactory.getLogger(OrcidNoDoiTest.class);

	String nameA = "Khairy";
	String surnameA = "Abdel Dayem";
	String otherNameA = "Dayem MKA";
	String nameB = "K";
	String surnameB = "Abdel-Dayem";
	String orcidIdA = "0000-0003-2760-1191";
	Double threshold = 0.8;

	@Test
	@Ignore
	private void similarityTest() throws Exception {
		logger.info("running testSimilarity ....");
		logger
			.info(
				"JaroWinklerSimilarity: "
					+ Double.toString(similarityJaroWinkler(nameA, surnameA, nameB, surnameB)));
		logger
			.info(
				"JaccardSimilarity: " + Double.toString(similarityJaccard(nameA, surnameA, nameB, surnameB)));
	}

	@Test
	@Ignore
	private void bestMatchTest() throws Exception {
		logger.info("running bestMatchTest ....");
		String contributor = surnameB + ", " + nameB;
		logger.info("score: " + Double.toString(bestMatch(surnameA, nameA, contributor)));
	}

	private static Double bestMatch(String authorSurname, String authorName, String contributor) {
		logger.debug(authorSurname + " " + authorName + " vs " + contributor);
		String[] contributorSplitted = contributor.split(" ");
		if (contributorSplitted.length == 0) {
			return 0.0;
		}
		final String contributorName = contributorSplitted[contributorSplitted.length - 1];
		String contributorSurname = "";
		if (contributorSplitted.length > 1) {
			StringJoiner joiner = new StringJoiner(" ");
			for (int i = 0; i < contributorSplitted.length - 1; i++) {
				joiner.add(contributorSplitted[i]);
			}
			contributorSurname = joiner.toString();
		}
		logger
			.debug(
				"contributorName: " + contributorName +
					" contributorSurname: " + contributorSurname);
		String authorNameNrm = normalize(authorName);
		String authorSurnameNrm = normalize(authorSurname);
		String contributorNameNrm = normalize(contributorName);
		String contributorSurnameNrm = normalize(contributorSurname);
		Double sm1 = similarity(authorNameNrm, authorSurnameNrm, contributorNameNrm, contributorSurnameNrm);
		Double sm2 = similarity(authorNameNrm, authorSurnameNrm, contributorSurnameNrm, contributorNameNrm);
		if (sm1.compareTo(sm2) >= 0) {
			return sm1;
		}
		return sm2;
	}

	private static Double similarity(String nameA, String surnameA, String nameB, String surnameB) {
		Double score = similarityJaroWinkler(nameA, surnameA, nameB, surnameB);
		logger
			.debug(nameA + ", " + surnameA + " <> " + nameB + ", " + surnameB + "   score: " + Double.toString(score));
		return score;
	}

	private static Double similarityJaccard(String nameA, String surnameA, String nameB, String surnameB) {
		return new JaccardSimilarity().apply(normalize(parse(nameA, surnameA)), normalize(parse(nameB, surnameB)));
	}

	private static Double similarityJaroWinkler(String nameA, String surnameA, String nameB, String surnameB) {
		return new JaroWinklerSimilarity().apply(normalize(parse(nameA, surnameA)), normalize(parse(nameB, surnameB)));
	}

	private static String parse(String name, String surname) {
		return surname + " " + name;
	}

	private static String normalize(final String s) {
		return nfd(s)
			.toLowerCase()
			// do not compact the regexes in a single expression, would cause StackOverflowError
			// in case
			// of large input strings
			.replaceAll("(\\W)+", " ")
			.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
			.replaceAll("(\\p{Punct})+", " ")
			.replaceAll("(\\d)+", " ")
			.replaceAll("(\\n)+", " ")
			.trim();
	}

	private static String nfd(final String s) {
		return Normalizer.normalize(s, Normalizer.Form.NFD);
	}

	@Test
	@Ignore
	public void readPublicationFieldsTest()
		throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException {
		logger.info("running loadPublicationFieldsTest ....");
		String xml = IOUtils
			.toString(
				OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0003-2760-1191.xml"));

		if (xml == null) {
			logger.info("Resource not found");
		}
		XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();
		if (p == null) {
			logger.info("XMLRecordParserNoDoi null");
		}
		WorkDataNoDoi workData = null;
		try {
			workData = p.VTDParseWorkData(xml.getBytes());
		} catch (Exception e) {
			logger.error("parsing xml", e);
		}
		assertNotNull(workData);
		assertNotNull(workData.getOid());
		logger.info("oid: " + workData.getOid());
		assertNotNull(workData.getTitles());
		logger.info("titles: ");
		workData.getTitles().forEach(t -> {
			logger.info(t);
		});
		logger.info("source: " + workData.getSourceName());
		logger.info("type: " + workData.getType());
		logger.info("urls: ");
		workData.getUrls().forEach(u -> {
			logger.info(u);
		});
		logger.info("publication date: ");
		workData.getPublicationDates().forEach(d -> {
			logger.info(d.getYear() + " - " + d.getMonth() + " - " + d.getDay());
		});
		logger.info("external id: ");
		workData.getExtIds().removeIf(e -> e.getRelationShip() != null && !e.getRelationShip().equals("self"));
		workData.getExtIds().forEach(e -> {
			logger.info(e.getType() + " - " + e.getValue() + " - " + e.getRelationShip());
		});
		logger.info("contributors: ");
		workData.getContributors().forEach(c -> {
			logger
				.info(
					c.getName() + " - " + c.getRole() + " - " + c.getSequence());
		});

	}

	private void updateRanks(List<Contributor> contributors) {
		boolean seqFound = false;
		if (contributors
			.stream()
			.filter(
				c -> c.getRole() != null && c.getSequence() != null &&
					c.getRole().equals("author") && (c.getSequence().equals("first") ||
						c.getSequence().equals("additional")))
			.count() > 0) {
			seqFound = true;
			logger.info("sequence data found");
		}
		if (!seqFound) {
			List<Integer> seqIds = Arrays.asList(0);
			contributors.forEach(c -> {
				int currentSeq = seqIds.get(0) + 1;
				seqIds.set(0, currentSeq);
				c.setSequence(Integer.toString(seqIds.get(0)));
			});
		}
	}

	private void updateAuthorsSimpleMatch(List<Contributor> contributors, AuthorData author) {
		contributors.forEach(c -> {
			if (c.isSimpleMatch()) {
				logger.info("simple match on : " + c.getCreditName());
				c.setName(author.getName());
				c.setSurname(author.getSurname());
				c.setOid(author.getOid());
			}
		});
		updateRanks(contributors);
	}

	private void updateAuthorsSimilarityMatch(List<Contributor> contributors, AuthorData author) {
		logger.info("inside updateAuthorsSimilarityMatch ...");
		contributors.forEach(c -> {
			logger
				.info(
					c.getOid() + " - " + c.getCreditName() + " - " +
						c.getName() + " - " + c.getSurname() + " - " +
						c.getRole() + " - " + c.getSequence() + " - best: " + c.isBestMatch() + " - simpe: "
						+ c.isSimpleMatch());
		});

		contributors
			.stream()
			.filter(c -> c.isBestMatch())
			.forEach(c -> {
				logger.info("similarity match on : " + c.getCreditName());
				c.setName(author.getName());
				c.setSurname(author.getSurname());
				c.setOid(author.getOid());
			});
		updateRanks(contributors);
	}

	@Test
	@Ignore
	public void authorSimilarityMatchTest() throws Exception {
		logger.info("running authorSimilarityMatchTest ....");
		authorMatchTest("activity_work_0000-0003-2760-1191-similarity.xml");
	}

	@Test
	private void authorSimpleMatchTest() throws Exception {
		logger.info("running authorSimpleMatchTest ....");
		authorMatchTest("activity_work_0000-0003-2760-1191.xml");
	}

	private void authorMatchTest(String orcidWork)
		throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException {
		AuthorData author = new AuthorData();
		author.setName(nameA);
		author.setSurname(surnameA);
		author.setOid(orcidIdA);
		String xml = IOUtils
			.toString(
				OrcidNoDoiTest.class.getResourceAsStream(orcidWork));

		if (xml == null) {
			logger.info("Resource not found");
		}
		XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();
		if (p == null) {
			logger.info("XMLRecordParserNoDoi null");
		}
		WorkDataNoDoi workData = null;
		try {
			workData = p.VTDParseWorkData(xml.getBytes());
		} catch (Exception e) {
			logger.error("parsing xml", e);
		}
		assertNotNull(workData);
		int matchCounter = 0;
		List<Integer> matchCounters = Arrays.asList(matchCounter);
		Contributor contributor = null;
		workData.getContributors().forEach(c -> {
			if (normalize(c.getCreditName()).contains(normalize(author.getName())) ||
				normalize(c.getCreditName()).contains(normalize(author.getSurname())) ||
				((author.getOtherName() != null)
					&& normalize(c.getCreditName()).contains(normalize(author.getOtherName())))) {
				matchCounters.set(0, matchCounters.get(0) + 1);
				c.setSimpleMatch(true);
			}
		});
		logger.info("match counter: " + Integer.toString(matchCounters.get(0)));
		if (matchCounters.get(0) == 1) {
			updateAuthorsSimpleMatch(workData.getContributors(), author);
		} else if (matchCounters.get(0) > 1) {
			Optional<Contributor> optCon = workData
				.getContributors()
				.stream()
				.filter(c -> c.isSimpleMatch())
				.map(c -> {
					c.setScore(bestMatch(nameA, surnameA, c.getCreditName()));
					logger.debug("nella map: " + c.getCreditName() + " score: " + c.getScore());
					return c;
				})
				.filter(c -> c.getScore() >= threshold)
				.max(Comparator.comparing(c -> c.getScore()));
			Contributor bestMatchContributor = null;
			if (optCon.isPresent()) {
				bestMatchContributor = optCon.get();
				bestMatchContributor.setBestMatch(true);
				logger.info("best match: " + bestMatchContributor.getCreditName());
				updateAuthorsSimilarityMatch(workData.getContributors(), author);
			}

		}

		logger.info("UPDATED contributors: ");
		workData.getContributors().forEach(c -> {
			logger
				.info(
					c.getOid() + " - " + c.getCreditName() + " - " +
						c.getName() + " - " + c.getSurname() + " - " +
						c.getRole() + " - " + c.getSequence());
		});
	}
}

//
//		orcid_RDD = sc.textFile(ORCID_DUMP_PATH)
//		no_doi_works_RDD = orcid_RDD.map(orcid_map).filter(lambda x:x is not None).map(lambda x: json.dumps(x)).saveAsTextFile(path=ORCID_OPENAIRE_PATH,compressionCodecClass="org.apache.hadoop.io.compress.GzipCodec")
//
parser of orcid publication data from xml original dump 2020-06-24 16:29:32 +02:00
			`package eu.dnetlib.doiboost.orcidnodoi.xml;`

			`import com.ximpleware.NavException;`
			`import com.ximpleware.ParseException;`
			`import com.ximpleware.XPathEvalException;`
			`import com.ximpleware.XPathParseException;`
			`import eu.dnetlib.dhp.parser.utility.VtdException;`
			`import eu.dnetlib.doiboost.orcid.model.AuthorData;`
			`import eu.dnetlib.doiboost.orcidnodoi.model.Contributor;`
			`import eu.dnetlib.doiboost.orcidnodoi.model.WorkDataNoDoi;`
			`import jdk.nashorn.internal.ir.annotations.Ignore;`
			`import org.apache.commons.io.IOUtils;`
			`import org.apache.commons.text.similarity.JaccardSimilarity;`
			`import org.apache.commons.text.similarity.JaroWinklerSimilarity;`
			`import org.junit.jupiter.api.Test;`
			`import org.slf4j.Logger;`
			`import org.slf4j.LoggerFactory;`

			`import java.io.IOException;`
			`import java.text.Normalizer;`
			`import java.util.*;`

			`import static org.junit.jupiter.api.Assertions.assertNotNull;`

			`public class OrcidNoDoiTest {`

			`private static final Logger logger = LoggerFactory.getLogger(OrcidNoDoiTest.class);`

			`String nameA = "Khairy";`
			`String surnameA = "Abdel Dayem";`
			`String otherNameA = "Dayem MKA";`
			`String nameB = "K";`
			`String surnameB = "Abdel-Dayem";`
			`String orcidIdA = "0000-0003-2760-1191";`
			`Double threshold = 0.8;`

			`@Test`
			`@Ignore`
			`private void similarityTest() throws Exception {`
			`logger.info("running testSimilarity ....");`
			`logger`
			`.info(`
			`"JaroWinklerSimilarity: "`
			`+ Double.toString(similarityJaroWinkler(nameA, surnameA, nameB, surnameB)));`
			`logger`
			`.info(`
			`"JaccardSimilarity: " + Double.toString(similarityJaccard(nameA, surnameA, nameB, surnameB)));`
			`}`

			`@Test`
			`@Ignore`
			`private void bestMatchTest() throws Exception {`
			`logger.info("running bestMatchTest ....");`
			`String contributor = surnameB + ", " + nameB;`
			`logger.info("score: " + Double.toString(bestMatch(surnameA, nameA, contributor)));`
			`}`

			`private static Double bestMatch(String authorSurname, String authorName, String contributor) {`
			`logger.debug(authorSurname + " " + authorName + " vs " + contributor);`
			`String[] contributorSplitted = contributor.split(" ");`
			`if (contributorSplitted.length == 0) {`
			`return 0.0;`
			`}`
			`final String contributorName = contributorSplitted[contributorSplitted.length - 1];`
			`String contributorSurname = "";`
			`if (contributorSplitted.length > 1) {`
			`StringJoiner joiner = new StringJoiner(" ");`
			`for (int i = 0; i < contributorSplitted.length - 1; i++) {`
			`joiner.add(contributorSplitted[i]);`
			`}`
			`contributorSurname = joiner.toString();`
			`}`
			`logger`
			`.debug(`
			`"contributorName: " + contributorName +`
			`" contributorSurname: " + contributorSurname);`
			`String authorNameNrm = normalize(authorName);`
			`String authorSurnameNrm = normalize(authorSurname);`
			`String contributorNameNrm = normalize(contributorName);`
			`String contributorSurnameNrm = normalize(contributorSurname);`
			`Double sm1 = similarity(authorNameNrm, authorSurnameNrm, contributorNameNrm, contributorSurnameNrm);`
			`Double sm2 = similarity(authorNameNrm, authorSurnameNrm, contributorSurnameNrm, contributorNameNrm);`
			`if (sm1.compareTo(sm2) >= 0) {`
			`return sm1;`
			`}`
			`return sm2;`
			`}`

			`private static Double similarity(String nameA, String surnameA, String nameB, String surnameB) {`
			`Double score = similarityJaroWinkler(nameA, surnameA, nameB, surnameB);`
			`logger`
			`.debug(nameA + ", " + surnameA + " <> " + nameB + ", " + surnameB + " score: " + Double.toString(score));`
			`return score;`
			`}`

			`private static Double similarityJaccard(String nameA, String surnameA, String nameB, String surnameB) {`
			`return new JaccardSimilarity().apply(normalize(parse(nameA, surnameA)), normalize(parse(nameB, surnameB)));`
			`}`

			`private static Double similarityJaroWinkler(String nameA, String surnameA, String nameB, String surnameB) {`
			`return new JaroWinklerSimilarity().apply(normalize(parse(nameA, surnameA)), normalize(parse(nameB, surnameB)));`
			`}`

			`private static String parse(String name, String surname) {`
			`return surname + " " + name;`
			`}`

			`private static String normalize(final String s) {`
			`return nfd(s)`
			`.toLowerCase()`
			`// do not compact the regexes in a single expression, would cause StackOverflowError`
			`// in case`
			`// of large input strings`
			`.replaceAll("(\\W)+", " ")`
			`.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")`
			`.replaceAll("(\\p{Punct})+", " ")`
			`.replaceAll("(\\d)+", " ")`
			`.replaceAll("(\\n)+", " ")`
			`.trim();`
			`}`

			`private static String nfd(final String s) {`
			`return Normalizer.normalize(s, Normalizer.Form.NFD);`
			`}`

			`@Test`
			`@Ignore`
			`public void readPublicationFieldsTest()`
			`throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException {`
			`logger.info("running loadPublicationFieldsTest ....");`
			`String xml = IOUtils`
			`.toString(`
			`OrcidNoDoiTest.class.getResourceAsStream("activity_work_0000-0003-2760-1191.xml"));`

			`if (xml == null) {`
			`logger.info("Resource not found");`
			`}`
			`XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();`
			`if (p == null) {`
			`logger.info("XMLRecordParserNoDoi null");`
			`}`
			`WorkDataNoDoi workData = null;`
			`try {`
			`workData = p.VTDParseWorkData(xml.getBytes());`
			`} catch (Exception e) {`
			`logger.error("parsing xml", e);`
			`}`
			`assertNotNull(workData);`
			`assertNotNull(workData.getOid());`
			`logger.info("oid: " + workData.getOid());`
			`assertNotNull(workData.getTitles());`
			`logger.info("titles: ");`
			`workData.getTitles().forEach(t -> {`
			`logger.info(t);`
			`});`
			`logger.info("source: " + workData.getSourceName());`
			`logger.info("type: " + workData.getType());`
			`logger.info("urls: ");`
			`workData.getUrls().forEach(u -> {`
			`logger.info(u);`
			`});`
			`logger.info("publication date: ");`
			`workData.getPublicationDates().forEach(d -> {`
			`logger.info(d.getYear() + " - " + d.getMonth() + " - " + d.getDay());`
			`});`
			`logger.info("external id: ");`
			`workData.getExtIds().removeIf(e -> e.getRelationShip() != null && !e.getRelationShip().equals("self"));`
			`workData.getExtIds().forEach(e -> {`
			`logger.info(e.getType() + " - " + e.getValue() + " - " + e.getRelationShip());`
			`});`
			`logger.info("contributors: ");`
			`workData.getContributors().forEach(c -> {`
			`logger`
			`.info(`
			`c.getName() + " - " + c.getRole() + " - " + c.getSequence());`
			`});`

			`}`

			`private void updateRanks(List<Contributor> contributors) {`
			`boolean seqFound = false;`
			`if (contributors`
			`.stream()`
			`.filter(`
			`c -> c.getRole() != null && c.getSequence() != null &&`
			`c.getRole().equals("author") && (c.getSequence().equals("first") \|\|`
			`c.getSequence().equals("additional")))`
			`.count() > 0) {`
			`seqFound = true;`
			`logger.info("sequence data found");`
			`}`
			`if (!seqFound) {`
			`List<Integer> seqIds = Arrays.asList(0);`
			`contributors.forEach(c -> {`
			`int currentSeq = seqIds.get(0) + 1;`
			`seqIds.set(0, currentSeq);`
			`c.setSequence(Integer.toString(seqIds.get(0)));`
			`});`
			`}`
			`}`

			`private void updateAuthorsSimpleMatch(List<Contributor> contributors, AuthorData author) {`
			`contributors.forEach(c -> {`
			`if (c.isSimpleMatch()) {`
			`logger.info("simple match on : " + c.getCreditName());`
			`c.setName(author.getName());`
			`c.setSurname(author.getSurname());`
			`c.setOid(author.getOid());`
			`}`
			`});`
			`updateRanks(contributors);`
			`}`

			`private void updateAuthorsSimilarityMatch(List<Contributor> contributors, AuthorData author) {`
			`logger.info("inside updateAuthorsSimilarityMatch ...");`
			`contributors.forEach(c -> {`
			`logger`
			`.info(`
			`c.getOid() + " - " + c.getCreditName() + " - " +`
			`c.getName() + " - " + c.getSurname() + " - " +`
			`c.getRole() + " - " + c.getSequence() + " - best: " + c.isBestMatch() + " - simpe: "`
			`+ c.isSimpleMatch());`
			`});`

			`contributors`
			`.stream()`
			`.filter(c -> c.isBestMatch())`
			`.forEach(c -> {`
			`logger.info("similarity match on : " + c.getCreditName());`
			`c.setName(author.getName());`
			`c.setSurname(author.getSurname());`
			`c.setOid(author.getOid());`
			`});`
			`updateRanks(contributors);`
			`}`

			`@Test`
			`@Ignore`
			`public void authorSimilarityMatchTest() throws Exception {`
			`logger.info("running authorSimilarityMatchTest ....");`
			`authorMatchTest("activity_work_0000-0003-2760-1191-similarity.xml");`
			`}`

			`@Test`
			`private void authorSimpleMatchTest() throws Exception {`
			`logger.info("running authorSimpleMatchTest ....");`
			`authorMatchTest("activity_work_0000-0003-2760-1191.xml");`
			`}`

			`private void authorMatchTest(String orcidWork)`
			`throws IOException, XPathEvalException, XPathParseException, NavException, VtdException, ParseException {`
			`AuthorData author = new AuthorData();`
			`author.setName(nameA);`
			`author.setSurname(surnameA);`
			`author.setOid(orcidIdA);`
			`String xml = IOUtils`
			`.toString(`
			`OrcidNoDoiTest.class.getResourceAsStream(orcidWork));`

			`if (xml == null) {`
			`logger.info("Resource not found");`
			`}`
			`XMLRecordParserNoDoi p = new XMLRecordParserNoDoi();`
			`if (p == null) {`
			`logger.info("XMLRecordParserNoDoi null");`
			`}`
			`WorkDataNoDoi workData = null;`
			`try {`
			`workData = p.VTDParseWorkData(xml.getBytes());`
			`} catch (Exception e) {`
			`logger.error("parsing xml", e);`
			`}`
			`assertNotNull(workData);`
			`int matchCounter = 0;`
			`List<Integer> matchCounters = Arrays.asList(matchCounter);`
			`Contributor contributor = null;`
			`workData.getContributors().forEach(c -> {`
			`if (normalize(c.getCreditName()).contains(normalize(author.getName())) \|\|`
			`normalize(c.getCreditName()).contains(normalize(author.getSurname())) \|\|`
			`((author.getOtherName() != null)`
			`&& normalize(c.getCreditName()).contains(normalize(author.getOtherName())))) {`
			`matchCounters.set(0, matchCounters.get(0) + 1);`
			`c.setSimpleMatch(true);`
			`}`
			`});`
			`logger.info("match counter: " + Integer.toString(matchCounters.get(0)));`
			`if (matchCounters.get(0) == 1) {`
			`updateAuthorsSimpleMatch(workData.getContributors(), author);`
			`} else if (matchCounters.get(0) > 1) {`
			`Optional<Contributor> optCon = workData`
			`.getContributors()`
			`.stream()`
			`.filter(c -> c.isSimpleMatch())`
			`.map(c -> {`
			`c.setScore(bestMatch(nameA, surnameA, c.getCreditName()));`
			`logger.debug("nella map: " + c.getCreditName() + " score: " + c.getScore());`
			`return c;`
			`})`
			`.filter(c -> c.getScore() >= threshold)`
			`.max(Comparator.comparing(c -> c.getScore()));`
			`Contributor bestMatchContributor = null;`
			`if (optCon.isPresent()) {`
			`bestMatchContributor = optCon.get();`
			`bestMatchContributor.setBestMatch(true);`
			`logger.info("best match: " + bestMatchContributor.getCreditName());`
			`updateAuthorsSimilarityMatch(workData.getContributors(), author);`
			`}`

			`}`

			`logger.info("UPDATED contributors: ");`
			`workData.getContributors().forEach(c -> {`
			`logger`
			`.info(`
			`c.getOid() + " - " + c.getCreditName() + " - " +`
			`c.getName() + " - " + c.getSurname() + " - " +`
			`c.getRole() + " - " + c.getSequence());`
			`});`
			`}`
			`}`

			`//`
			`// orcid_RDD = sc.textFile(ORCID_DUMP_PATH)`
			`// no_doi_works_RDD = orcid_RDD.map(orcid_map).filter(lambda x:x is not None).map(lambda x: json.dumps(x)).saveAsTextFile(path=ORCID_OPENAIRE_PATH,compressionCodecClass="org.apache.hadoop.io.compress.GzipCodec")`
			`//`