enhanced merge segments algorithm.

git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/data-analysis/nlphub@165167 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Erik Perrone 2018-03-16 10:01:37 +00:00
parent b2c98580da
commit 3c689932ed
4 changed files with 87 additions and 112 deletions

View File

@ -0,0 +1,8 @@
log4j.rootLogger=INFO,stdout
log4j.logger.com.endeca=INFO
# Logger for crawl metrics
log4j.logger.com.endeca.itl.web.metrics=INFO
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%p\t%d{ISO8601}\t%r\t%c\t[%t]\t%m%n

View File

@ -102,12 +102,12 @@ $(document).ready(function() {
}, },
onSuccess : function(files, data, xhr) { onSuccess : function(files, data, xhr) {
hideProgress(); hideProgress();
console.log('' + data.language); //console.log('' + data.language);
checkLanguage(data.language); checkLanguage(data.language);
selectAnnotationsByLanguage(); selectAnnotationsByLanguage();
$("#reset-upload").css("display", $("#reset-upload").css("display",
"inline"); "inline");
console.log("files: " + files); //console.log("files: " + files);
$("#file-info").empty(); $("#file-info").empty();
$("#file-info") $("#file-info")
.append( .append(
@ -373,6 +373,7 @@ showProgress = function() {
$(".progress-circular-div").css("left", left); $(".progress-circular-div").css("left", left);
$(".progress-circular-div").css("top", top); $(".progress-circular-div").css("top", top);
$(".hidden-div").css("display", "block"); $(".hidden-div").css("display", "block");
$(".hidden-div").show();
} }
/* /*
@ -380,6 +381,7 @@ showProgress = function() {
*/ */
hideProgress = function() { hideProgress = function() {
$(".hidden-div").css("display", "none"); $(".hidden-div").css("display", "none");
$(".hidden-div").hide();
} }
/* /*
@ -471,7 +473,7 @@ getOutputJson = function(message) {
parameters += "&plink=" + encodeURI(publicLink); parameters += "&plink=" + encodeURI(publicLink);
parameters += "&lang=" + $("#language-select").val(); parameters += "&lang=" + $("#language-select").val();
parameters += "&tobemap=" + encodeURI(tobemap); parameters += "&tobemap=" + encodeURI(tobemap);
console.log(parameters); //console.log(parameters);
$.ajax({ $.ajax({
url : "/nlphub/nlphub-mapper-servlet?" + parameters, url : "/nlphub/nlphub-mapper-servlet?" + parameters,
@ -479,7 +481,7 @@ getOutputJson = function(message) {
async : true, async : true,
success : function(data, stato) { success : function(data, stato) {
hideProgress(); hideProgress();
console.log(data); //console.log(data);
if ((typeof (data.response) != "undefined") if ((typeof (data.response) != "undefined")
&& (data.response.trim().toUpperCase() == "ERROR")) { && (data.response.trim().toUpperCase() == "ERROR")) {
alert("ERROR\n" + data.message); alert("ERROR\n" + data.message);
@ -604,49 +606,46 @@ rewriteText = function(annotation, color) {
var G = enhanceColor(complement.substring(2,4)); var G = enhanceColor(complement.substring(2,4));
var B = enhanceColor(complement.substring(4)); var B = enhanceColor(complement.substring(4));
complement = "#" + R + G + B; complement = "#" + R + G + B;
console.log("-getIndices: start"); showProgress();
var indices = getIndices(annotation); $(".hidden-div").show();
console.log("-getIndices: end"); window.setTimeout(function() {
$("#result-header-right").empty(); var indices = getIndices(annotation);
$("#result-header-right").append("<span style='color:" + color + ";'>" + annotation + "</span> occurs " + indices.length + " times."); $(".hidden-div").hide();
var indexedText = "";
$("#result-header-right").empty();
$("#result-header-right").append("<span style='color:" + color + ";'>" + annotation + "</span> occurs " + indices.length + " times.");
var indexedText = "";
if ((typeof (indices) == 'undefined') || (indices.length == 0)) {
indexedText = resultText;
indexedText = indexedText.replace(/\n/g, "<br>");
indexedText = indexedText.replace(/</g, "&lt;");
indexedText = indexedText.replace(/>/g, "&gt;");
$("#result-text-div").append("<p>" + indexedText + "</p>");
return;
}
var t = 0;
var offset = 0;
for (var i = 0; i < indices.length; i++) {
var index = indices[i];
var start = index[0];
var end = index[1];
indexedText += resultText.substring(t, start);
var colored = "<span style='color:" + color + "; background:"
+ complement + "; font-weight:bold;'>"
+ resultText.substring(start, end) + "</span>";
indexedText += colored;
t = end;
}
if (t < resultText.length)
indexedText += resultText.substring(t);
if ((typeof (indices) == 'undefined') || (indices.length == 0)) {
indexedText = resultText;
indexedText = indexedText.replace(/\n/g, "<br>"); indexedText = indexedText.replace(/\n/g, "<br>");
indexedText = indexedText.replace(/</g, "&lt;");
indexedText = indexedText.replace(/>/g, "&gt;");
$("#result-text-div").append("<p>" + indexedText + "</p>"); $("#result-text-div").append("<p>" + indexedText + "</p>");
return;
} }, 50);
var t = 0;
var offset = 0;
for (var i = 0; i < indices.length; i++) {
var index = indices[i];
var start = index[0];
var end = index[1];
indexedText += resultText.substring(t, start);
/*
offset += countSubstringOccurrencies(resultText.substring(t, start),
"\n");
offset += countSubstringOccurrencies(resultText.substring(t, start),
"\r");
offset += countSubstringOccurrencies(resultText.substring(t, start),
"\t");
start += offset;
end += offset;*/
var colored = "<span style='color:" + color + "; background:"
+ complement + "; font-weight:bold;'>"
+ resultText.substring(start, end) + "</span>";
indexedText += colored;
t = end;
}
if (t < resultText.length)
indexedText += resultText.substring(t);
indexedText = indexedText.replace(/\n/g, "<br>");
$("#result-text-div").append("<p>" + indexedText + "</p>");
} }
/* /*
@ -699,14 +698,12 @@ getIndices = function(annotation) {
* Merge the indices * Merge the indices
*/ */
mergeIndices = function(indices) { mergeIndices = function(indices) {
console.log("--mergeIndices start");
var newIndices = []; var newIndices = [];
//console.log("--mergeIndices: indices.length=" + indices.length); //console.log("--mergeIndices: indices.length=" + indices.length);
if (indices.length == 1) if (indices.length == 1)
newIndices = indices[0]; newIndices = indices[0];
else else
newIndices = mergeAll(indices); newIndices = mergeAll(indices);
console.log("--mergeIndices end");
return newIndices; return newIndices;
} }

View File

@ -43,43 +43,6 @@ compareSegments = function(s1, s2) {
return (s1[0] - s2[0]); return (s1[0] - s2[0]);
} }
cleanSegmentList = function(list) {
var cleaned = [];
for(var i=0; i<list.length; i++) {
if(cleaned.length == 0)
cleaned[cleaned.length] = list[i];
else {
var found = false;
for(var j=0; j<cleaned.length; j++) {
if(compareSegment(cleaned[j], list[i]) == 0) {
if(cleaned[j][0] > list[i][0])
cleaned[j] = list[i];
found = true;
break;
}
if(compareSegment(cleaned[j], list[i]) < 0) {
if(cleaned[j][0] <= list[i][0])
cleaned[j] = [cleaned[j][0], list[i][1]];
else
cleaned[j] = [list[i][0], cleaned[j][1]];
found = true;
}
}
if(!found) cleaned[cleaned.length] = list[i];
}
}
return cleaned;
}
compareSegmentList = function(list1, list2) {
if(list1.length != list2.length)
return false;
for(var i=0; i<list1.length; i++)
if((list1[i][0] != list2[i][0]) || (list1[i][1] != list2[i][1]))
return false;
return true;
}
/** /**
* mergeAll: merge indices * mergeAll: merge indices
* parameters: indices = Array of Array of indices (index = [start, end]) * parameters: indices = Array of Array of indices (index = [start, end])
@ -87,42 +50,46 @@ compareSegmentList = function(list1, list2) {
*/ */
mergeAll = function(indices) { mergeAll = function(indices) {
console.log("---mergeAll indices.length=" + indices.length);
var m = []; var m = [];
for(var i=0; i<indices[0].length; i++) {
for(var j=1; j<indices.length; j++) {
for(var k=0; k<indices[j].length; k++) {
var res = mergeSegment(indices[0][i], indices[j][k]);
for(var u=0; u<res.length; u++)
m[m.length] = res[u];
}
}
}
/*
var end = false;
var counter = 1;
while(!end) {
var old = m.slice();
m = cleanSegmentList(m);
//console.log(m);
end = compareSegmentList(old, m);
counter++;
if(counter == 100)
end = true;
}
return m.sort(compareSegments);*/
// first of all: creates a 1-dimension array with all data
for(var i=0; i<indices.length; i++) {
m = m.concat(indices[i]);
}
//
// second step: sort the array
// for our purposes a segment is 'lower than' another segment if the left value of the segment
// is lower than the left value of the other segment. In other words:
// [a, b] < [c, d] if a < c
//
m = m.sort(compareSegments); m = m.sort(compareSegments);
var m2 = []; var m2 = [];
//
// merging procedure:
// the procedure uses the functions:
// [1] 'compareSegment'.
// when two segment are equals or included compareSegment returns 0
// when two segment are intersecting compareSegment returns -1
// when two segment have are external (no intersection) compareSegment returns 1
//
// [2] 'mergeSegment'
// returns the "union" of two segments
//
var current = m[0]; var current = m[0];
for(var i=0; i<m.length; i++) { for(var i=0; i<m.length; i++) {
var cfr = compareSegment(current, m[i]); var cfr = compareSegment(current, m[i]);
switch(cfr) { switch(cfr) {
case 0: case 0:
case -1: case -1:
// if segments are the same or intersected the result is the merged segment
current = mergeSegment(current, m[i])[0]; current = mergeSegment(current, m[i])[0];
break; break;
default: default:
// if segments are external mergeSegment produce two segments: the first is ready to be stored in the output vector
// the second is to be compared with others
var s = mergeSegment(current, m[i]); var s = mergeSegment(current, m[i]);
m2[m2.length] = s[0]; m2[m2.length] = s[0];
current = s[1]; current = s[1];
@ -130,7 +97,10 @@ mergeAll = function(indices) {
} }
} }
if((current[0] != m2[m2.length-1][0]) || (current[1] != m2[m2.length-1][1])) if(m2.length == 0) {
m2[0] = current;
}
else if((current[0] != m2[m2.length-1][0]) || (current[1] != m2[m2.length-1][1]))
m2[m2.length] = current; m2[m2.length] = current;
return m2; return m2;
} }

View File

@ -18,33 +18,33 @@ var indices = [
//console.log(newIndices); //console.log(newIndices);
// answer is: [0,19], [21,23] // answer is: [0,19], [21,23]
indices = [ indices1 = [
[[7,19], [21,25]], [[7,19], [21,25]],
[[1,5], [18,22]] [[1,5], [18,22]]
]; ];
//answer is: [1,5], [7,25] //answer is: [1,5], [7,25]
indices = [ indices2 = [
[[7,16], [21,25]], [[7,16], [21,25]],
[[1,5], [18,22]] [[1,5], [18,22]]
]; ];
//answer is: [1,5], [7,16], [18,25] //answer is: [1,5], [7,16], [18,25]
indices = [ indices3 = [
[[7,16], [21,25]], [[7,16], [21,25]],
[[1,5], [18,22]], [[1,5], [18,22]],
[[2,30]] [[2,30]]
]; ];
//answer is: [1,30] //answer is: [1,30]
indices = [ indices4 = [
[[1,4], [5, 8], [21,25]], [[1,4], [5, 8], [21,25]],
[[1,5], [18,22]], [[1,5], [18,22]],
[[0,2], [11,15], [27,31]] [[0,2], [11,15], [27,31]]
]; ];
//answer is: [0,8],[11,15],[18,25],[27,31] //answer is: [0,8],[11,15],[18,25],[27,31]
var indices = indices4;
var mm = mergeAll(indices); var mm = mergeAll(indices);
console.log(mm); console.log(mm);