package eu.dnetlib.data.collector.plugins.schemaorg;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.net.URL;
import java.nio.charset.Charset;
import java.util.Iterator;

public class EndpointAccessIterator implements Iterator<JSONObject> {
	private static final Log log = LogFactory.getLog(EndpointAccessIterator.class);

	public static class Options {

		private Charset charset;

		public Options(){}

		public Options(Charset charset) {
			this.charset = charset;
		}

		public Charset getCharset() {
			return charset;
		}

		public void setCharset(Charset charset) {
			this.charset = charset;
		}
	}

	private Options options;
	private Iterator<String> repositoryIterator;

	public EndpointAccessIterator(Options options, Iterator<String> repositoryIterator) {
		this.options = options;
		this.repositoryIterator = repositoryIterator;
	}

	@Override
	public boolean hasNext() {
		return this.repositoryIterator.hasNext();
	}

	@Override
	public JSONObject next() {
		String endpoint = this.repositoryIterator.next();
		if(endpoint == null) return null;

		log.debug(String.format("processing: %s", endpoint));

		JSONObject dataset = this.extractDatasetRecord(endpoint);

		return dataset;
	}

	private JSONObject extractDatasetRecord(String endpoint) {
		JSONObject datasetDocument = null;
		try {
			URL urlEndpoint = new URL(endpoint);
			log.debug("downloading endpoint "+urlEndpoint);
			String payload = Utils.RemoteAccessWithRetry(3, 5000, urlEndpoint, this.options.getCharset());

			log.trace("downloaded payload id: "+payload);
			Document doc = Jsoup.parse(payload);
			Elements scriptTags = doc.getElementsByTag("script");
			for (Element scriptTag : scriptTags) {
				if (!scriptTag.hasAttr("type")) continue;
				String scriptType = scriptTag.attr("type");
				if (!scriptType.equalsIgnoreCase("application/ld+json")) continue;

				String data = scriptTag.data();
				JSONObject schemaItem = new JSONObject(data);
				String context = schemaItem.optString("@context");
				String type = schemaItem.optString("@type");

				if (context == null || type == null) continue;

				Boolean isSchemaOrgContext = context.toLowerCase().startsWith("http://schema.org") || context.toLowerCase().startsWith("https://schema.org");
				Boolean isDataset = type.equalsIgnoreCase("dataset");

				if (!isSchemaOrgContext || !isDataset) continue;

				log.debug(String.format("discovered dataset document: %s", schemaItem.toString()));

				datasetDocument = schemaItem;
				break;
			}
		}catch(Exception ex){
			log.error("problem extracting dataset document. returning empty", ex);
			datasetDocument = null;
		}
		if(datasetDocument == null){
			log.debug("did not find any dataset document in endpoint");
		}
		else{
			log.debug("found dataset document in endpoint :"+datasetDocument.toString());
		}
		return datasetDocument;
	}
}