enhanced merge segments algorithm.

git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/data-analysis/nlphub@165167 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
Erik Perrone 2018-03-16 10:01:37 +00:00
parent b2c98580da
commit 3c689932ed
4 changed files with 87 additions and 112 deletions

View File

@ -0,0 +1,8 @@
log4j.rootLogger=INFO,stdout
log4j.logger.com.endeca=INFO
# Logger for crawl metrics
log4j.logger.com.endeca.itl.web.metrics=INFO
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%p\t%d{ISO8601}\t%r\t%c\t[%t]\t%m%n

View File

@ -102,12 +102,12 @@ $(document).ready(function() {
},
onSuccess : function(files, data, xhr) {
hideProgress();
console.log('' + data.language);
//console.log('' + data.language);
checkLanguage(data.language);
selectAnnotationsByLanguage();
$("#reset-upload").css("display",
"inline");
console.log("files: " + files);
//console.log("files: " + files);
$("#file-info").empty();
$("#file-info")
.append(
@ -373,6 +373,7 @@ showProgress = function() {
$(".progress-circular-div").css("left", left);
$(".progress-circular-div").css("top", top);
$(".hidden-div").css("display", "block");
$(".hidden-div").show();
}
/*
@ -380,6 +381,7 @@ showProgress = function() {
*/
hideProgress = function() {
$(".hidden-div").css("display", "none");
$(".hidden-div").hide();
}
/*
@ -471,7 +473,7 @@ getOutputJson = function(message) {
parameters += "&plink=" + encodeURI(publicLink);
parameters += "&lang=" + $("#language-select").val();
parameters += "&tobemap=" + encodeURI(tobemap);
console.log(parameters);
//console.log(parameters);
$.ajax({
url : "/nlphub/nlphub-mapper-servlet?" + parameters,
@ -479,7 +481,7 @@ getOutputJson = function(message) {
async : true,
success : function(data, stato) {
hideProgress();
console.log(data);
//console.log(data);
if ((typeof (data.response) != "undefined")
&& (data.response.trim().toUpperCase() == "ERROR")) {
alert("ERROR\n" + data.message);
@ -604,49 +606,46 @@ rewriteText = function(annotation, color) {
var G = enhanceColor(complement.substring(2,4));
var B = enhanceColor(complement.substring(4));
complement = "#" + R + G + B;
console.log("-getIndices: start");
var indices = getIndices(annotation);
console.log("-getIndices: end");
$("#result-header-right").empty();
$("#result-header-right").append("<span style='color:" + color + ";'>" + annotation + "</span> occurs " + indices.length + " times.");
var indexedText = "";
showProgress();
$(".hidden-div").show();
window.setTimeout(function() {
var indices = getIndices(annotation);
$(".hidden-div").hide();
$("#result-header-right").empty();
$("#result-header-right").append("<span style='color:" + color + ";'>" + annotation + "</span> occurs " + indices.length + " times.");
var indexedText = "";
if ((typeof (indices) == 'undefined') || (indices.length == 0)) {
indexedText = resultText;
indexedText = indexedText.replace(/\n/g, "<br>");
indexedText = indexedText.replace(/</g, "&lt;");
indexedText = indexedText.replace(/>/g, "&gt;");
$("#result-text-div").append("<p>" + indexedText + "</p>");
return;
}
var t = 0;
var offset = 0;
for (var i = 0; i < indices.length; i++) {
var index = indices[i];
var start = index[0];
var end = index[1];
indexedText += resultText.substring(t, start);
var colored = "<span style='color:" + color + "; background:"
+ complement + "; font-weight:bold;'>"
+ resultText.substring(start, end) + "</span>";
indexedText += colored;
t = end;
}
if (t < resultText.length)
indexedText += resultText.substring(t);
if ((typeof (indices) == 'undefined') || (indices.length == 0)) {
indexedText = resultText;
indexedText = indexedText.replace(/\n/g, "<br>");
indexedText = indexedText.replace(/</g, "&lt;");
indexedText = indexedText.replace(/>/g, "&gt;");
$("#result-text-div").append("<p>" + indexedText + "</p>");
return;
}
var t = 0;
var offset = 0;
for (var i = 0; i < indices.length; i++) {
var index = indices[i];
var start = index[0];
var end = index[1];
indexedText += resultText.substring(t, start);
/*
offset += countSubstringOccurrencies(resultText.substring(t, start),
"\n");
offset += countSubstringOccurrencies(resultText.substring(t, start),
"\r");
offset += countSubstringOccurrencies(resultText.substring(t, start),
"\t");
start += offset;
end += offset;*/
var colored = "<span style='color:" + color + "; background:"
+ complement + "; font-weight:bold;'>"
+ resultText.substring(start, end) + "</span>";
indexedText += colored;
t = end;
}
if (t < resultText.length)
indexedText += resultText.substring(t);
indexedText = indexedText.replace(/\n/g, "<br>");
$("#result-text-div").append("<p>" + indexedText + "</p>");
}, 50);
}
/*
@ -699,14 +698,12 @@ getIndices = function(annotation) {
* Merge the indices
*/
mergeIndices = function(indices) {
console.log("--mergeIndices start");
var newIndices = [];
//console.log("--mergeIndices: indices.length=" + indices.length);
if (indices.length == 1)
newIndices = indices[0];
else
newIndices = mergeAll(indices);
console.log("--mergeIndices end");
return newIndices;
}

View File

@ -43,43 +43,6 @@ compareSegments = function(s1, s2) {
return (s1[0] - s2[0]);
}
cleanSegmentList = function(list) {
var cleaned = [];
for(var i=0; i<list.length; i++) {
if(cleaned.length == 0)
cleaned[cleaned.length] = list[i];
else {
var found = false;
for(var j=0; j<cleaned.length; j++) {
if(compareSegment(cleaned[j], list[i]) == 0) {
if(cleaned[j][0] > list[i][0])
cleaned[j] = list[i];
found = true;
break;
}
if(compareSegment(cleaned[j], list[i]) < 0) {
if(cleaned[j][0] <= list[i][0])
cleaned[j] = [cleaned[j][0], list[i][1]];
else
cleaned[j] = [list[i][0], cleaned[j][1]];
found = true;
}
}
if(!found) cleaned[cleaned.length] = list[i];
}
}
return cleaned;
}
compareSegmentList = function(list1, list2) {
if(list1.length != list2.length)
return false;
for(var i=0; i<list1.length; i++)
if((list1[i][0] != list2[i][0]) || (list1[i][1] != list2[i][1]))
return false;
return true;
}
/**
* mergeAll: merge indices
* parameters: indices = Array of Array of indices (index = [start, end])
@ -87,42 +50,46 @@ compareSegmentList = function(list1, list2) {
*/
mergeAll = function(indices) {
console.log("---mergeAll indices.length=" + indices.length);
var m = [];
for(var i=0; i<indices[0].length; i++) {
for(var j=1; j<indices.length; j++) {
for(var k=0; k<indices[j].length; k++) {
var res = mergeSegment(indices[0][i], indices[j][k]);
for(var u=0; u<res.length; u++)
m[m.length] = res[u];
}
}
}
/*
var end = false;
var counter = 1;
while(!end) {
var old = m.slice();
m = cleanSegmentList(m);
//console.log(m);
end = compareSegmentList(old, m);
counter++;
if(counter == 100)
end = true;
}
return m.sort(compareSegments);*/
// first of all: creates a 1-dimension array with all data
for(var i=0; i<indices.length; i++) {
m = m.concat(indices[i]);
}
//
// second step: sort the array
// for our purposes a segment is 'lower than' another segment if the left value of the segment
// is lower than the left value of the other segment. In other words:
// [a, b] < [c, d] if a < c
//
m = m.sort(compareSegments);
var m2 = [];
//
// merging procedure:
// the procedure uses the functions:
// [1] 'compareSegment'.
// when two segment are equals or included compareSegment returns 0
// when two segment are intersecting compareSegment returns -1
// when two segment have are external (no intersection) compareSegment returns 1
//
// [2] 'mergeSegment'
// returns the "union" of two segments
//
var current = m[0];
for(var i=0; i<m.length; i++) {
var cfr = compareSegment(current, m[i]);
switch(cfr) {
case 0:
case -1:
// if segments are the same or intersected the result is the merged segment
current = mergeSegment(current, m[i])[0];
break;
default:
// if segments are external mergeSegment produce two segments: the first is ready to be stored in the output vector
// the second is to be compared with others
var s = mergeSegment(current, m[i]);
m2[m2.length] = s[0];
current = s[1];
@ -130,7 +97,10 @@ mergeAll = function(indices) {
}
}
if((current[0] != m2[m2.length-1][0]) || (current[1] != m2[m2.length-1][1]))
if(m2.length == 0) {
m2[0] = current;
}
else if((current[0] != m2[m2.length-1][0]) || (current[1] != m2[m2.length-1][1]))
m2[m2.length] = current;
return m2;
}

View File

@ -18,33 +18,33 @@ var indices = [
//console.log(newIndices);
// answer is: [0,19], [21,23]
indices = [
indices1 = [
[[7,19], [21,25]],
[[1,5], [18,22]]
];
//answer is: [1,5], [7,25]
indices = [
indices2 = [
[[7,16], [21,25]],
[[1,5], [18,22]]
];
//answer is: [1,5], [7,16], [18,25]
indices = [
indices3 = [
[[7,16], [21,25]],
[[1,5], [18,22]],
[[2,30]]
];
//answer is: [1,30]
indices = [
indices4 = [
[[1,4], [5, 8], [21,25]],
[[1,5], [18,22]],
[[0,2], [11,15], [27,31]]
];
//answer is: [0,8],[11,15],[18,25],[27,31]
var indices = indices4;
var mm = mergeAll(indices);
console.log(mm);