Suffix arrays have been around for a long time , For specific concepts, readers search by themselves , I only know a little bit about it , Not for discussion .

By finding the longest common substring of two strings , Demonstrate the classic application of suffix array .

First of all, we need to explain , The suffix array algorithm implemented by Xiaocai , Not standard , It's just drawing on the ideas .

It's a simple algorithm , There are two versions , The first is space for time , The second is time for space .

Space for time

 /*
Use suffix array to get the longest common substring of two strings
Space for time
@params
s1 String, The string to parse
s2 String, The string to parse
norepeat Boolean, Whether to de duplicate the results , Default true
*/
function commonSubString(s1, s2, norepeat){
var norepeat = norepeat == undefined ? true : norepeat,
array = suffixArray(s1, 1).concat(suffixArray(s2, 2)),
maxLength = 0,
maxStrings = [],
tempLength = 0,
i = 0,
length = 0,
result = {}; // Sort , Sort by string , Direct comparison is enough
array.sort(function(s1, s2){
return (s1.s == s2.s) ? 0 : (s1.s > s2.s) ? 1 : -1;
}); // Find the longest common substring
for(i = 0, length = array.length - 1; i < length; i++){
tempLength = commonLength(array[i].s, array[i+1].s);
if(array[i].g != array[i+1].g){
if(maxLength == tempLength){
maxStrings.push(array[i]);
}
if(maxLength < tempLength){
maxLength = tempLength;
maxStrings = [];
maxStrings.push(array[i]);
}
}
} // Structural results
result.length = maxLength;
result.contents = [];
for(i in maxStrings){
result.contents.push(maxStrings[i].s.substring(0, maxLength));
} // duplicate removal
if(norepeat){
result.contents = norepeatArray(result.contents);
} return result; /*
Gets the suffix array of the string
*/
function suffixArray(s, g){
var array = [],
i = 0,
length = s.length;
for(i = 0; i < length; i++){
array.push({
s: s.substring(i),
g: g // The purpose of grouping is to ensure that the longest common substring comes from two strings respectively
});
} return array;
}
/*
Get the maximum matching length
*/
function commonLength(s1, s2){
var slength = s1.length > s2.length ? s2.length : s1.length,
i = 0; // cycles = Shorter string length
for(i = 0; i < slength; i++){
// Compare bit by bit
if(s1.charAt(i) != s2.charAt(i)){
break;
}
} return i;
} /*
String array de duplication , Does not affect the original array , Returns a new array
*/
function norepeatArray(array){
var _array = array.slice(0),
map = {},
i = 0,
key = ""; // Hash the content as key
for(i in _array){
map[_array[i]] = 1;
} // Extract hash key, Repopulate to array
_array.splice(0, _array.length);
for(key in map){
_array.push(key);
} return _array;
}
}

Time for space

 /*
Use suffix array to get the longest common substring of two strings
Time for space
@params
s1 String, The string to parse
s2 String, The string to parse
norepeat Boolean, Whether to de duplicate the results , Default true
*/
function commonSubStringPro(s1, s2, norepeat){
var norepeat = norepeat == undefined ? true : norepeat,
array = suffixArray(s1, 1).concat(suffixArray(s2, 2)),
maxLength = 0,
maxStrings = [],
tempLength = 0,
i = 0,
length = 0,
result = {}; // Sort , Sort according to the actual content , Can't sort by pointer
array.sort(function(s1, s2){
var ts1 = s1.str.substring(s1.index),
ts2 = s2.str.substring(s2.index);
return (ts1 == ts2) ? 0 : (ts1 > ts2) ? 1 : -1;
}); // Find the longest common substring
for(i = 0, length = array.length - 1; i < length; i++){
tempLength = commonLength(array[i], array[i+1]);
if(array[i].group != array[i+1].group){
if(maxLength == tempLength){
maxStrings.push(array[i]);
}
if(maxLength < tempLength){
maxLength = tempLength;
maxStrings = [];
maxStrings.push(array[i]);
}
}
} // Structural results
result.length = maxLength;
result.contents = [];
for(i in maxStrings){
result.contents.push(maxStrings[i].str.substr(maxStrings[i].index, maxLength));
} // duplicate removal
if(norepeat){
result.contents = norepeatArray(result.contents);
} return result; /*
Gets the suffix array of the string
Save only pointers , There's no real content
*/
function suffixArray(s, g){
var array = [],
i = 0,
length = 0;
for(i = 0, length = s.length; i < length; i++){
array.push({
index: i,
str: s, // This is just a string pointer , No multiple copies will be created
group: g // The purpose of grouping is to ensure that the longest common substring comes from two strings respectively
});
} return array;
}
/*
Get the maximum matching length
*/
function commonLength(s1, s2){
var slength = 0,
i = 0; // cycles = Shorter string length
slength = (s1.str.length - s1.index) > (s2.str.length - s2.index) ? (s2.str.length - s2.index) : (s1.str.length - s1.index);
for(i = 0; i < slength; i++){
// Compare bit by bit
if(s1.str.substr(i + s1.index, 1) != s2.str.substr(i + s2.index, 1)){
break;
}
} return i;
} /*
String array de duplication , Does not affect the original array , Returns a new array
*/
function norepeatArray(array){
var _array = array.slice(0),
map = {},
i = 0,
key = ""; // Hash the content as key
for(i in _array){
map[_array[i]] = 1;
} // Extract hash key, Repopulate to array
_array.splice(0, _array.length);
for(key in map){
_array.push(key);
} return _array;
}
}

Why are there two versions ? Xiaocai originally only wrote the space for time version , This version has low implementation complexity , But there is an obvious drawback , It takes up too much useless memory , When the amount of analysis data is small , Can be perfectly competent , Once the amount of data reaches a certain level , It shows more than just longer execution time , It doesn't work at all , Unless there's enough memory .

Based on the above thinking , It's easy to find that when generating suffix array , There's no need to save the actual string at all , Just record the location information , thus , Lots of strings in memory , All become integer values , When making comparisons , We don't even need to restore strings , Just use the position to intercept a single character , In the end, there's a huge savings in memory , This is the time for space version .

Time for space , It's not just about saving memory , It's a change , It's never possible. It's possible , Now? , No matter how much data there is , Just a small amount of memory , Can support the operation of the program , No matter how long it takes , It's also possible , It won't just crash . Of course , current CPU It's running very fast .

Hope to enlighten the readers , As for the specific code , Not much , The notes are very detailed .

Use the suffix array to find the longest common substring JavaScript More related articles in

  1. hdu 1403 Longest Common Substring( The longest common substring )( The suffix array )

    http://acm.hdu.edu.cn/showproblem.php?pid=1403 Longest Common Substring Time Limit: 8000/4000 MS (Ja ...

  2. HDU 1403 Longest Common Substring( The suffix array , The longest public substring )

    hdu subject poj subject Refer to the   Luo Suiqian's paper < The suffix array —— A powerful tool for handling strings > The question : Find the longest common substring of two sequences Ideas : Suffix array is one of the classic topics ( Template questions ) // The suffix array sa: take s Of n It's a suffix ...

  3. poj2774 Long Long Message Find the longest common substring in suffix array

    Topic link :http://poj.org/problem?id=2774 This is a good introduction to suffix array The question : Here are two strings , Then find the longest continuous common substring of the two strings Two strings solved by suffix array ...

  4. URAL 1517 Freedom of Choice( The suffix array , The longest common string )

    subject Output the longest common string #define maxn 200010 int wa[maxn],wb[maxn],wv[maxn],ws[maxn]; int cmp(int *r,int a,int b, ...

  5. Palindrome--poj 1159( The longest common substring + Scroll numbers )

    http://poj.org/problem?id=1159 The main idea of the topic :   To give you one n   representative n Characters   The second line gives you a string   Make this string palindrome string At least how many characters need to be added analysis :   primary ...

  6. poj2774 The suffix array Find the longest common substring

    Reference:IOI2009 The paper http://www.cnblogs.com/ziyi--caolu/p/3192731.html #include "stdio.h" # ...

  7. Long Long Message (poj2774 Find the longest common substring in suffix array )

    Long Long Message Time Limit: 4000MS   Memory Limit: 131072K Total Submissions: 19206   Accepted: 79 ...

  8. POJ 2774 The suffix array : Find the longest common child

    reflection : Actually easy. Just two strings together . Through a special character , In the middle , Then find the longest common prefix in the suffix array . And then in two different strings , The longest is the longest common substring . Pay attention to is : Use the first string to infer if it's in the same character , Just. ...

  9. poj 1743 After men's eight questions, affix the array to find the longest non overlapping and longest repeating substring

    Musical Theme Time Limit: 1000MS   Memory Limit: 30000K Total Submissions: 14874   Accepted: 5118 De ...

Random recommendation

  1. 85 megacli- see raid Information

    I don't want to revise the article too much , Here, I will tell you the difficulties I encountered in the installation .1. Where to download ? First , According to the path in the article, the corresponding file can't be downloaded , Here we come to http://www.lsi.com Search the website of ...

  2. Alibaba cloud 377 Seconds to complete 100TB Data sorting : Second Samsung Baidu

    Alibaba cloud 377 Seconds to complete 100TB Data sorting : Second Samsung Baidu Today, ,Sort Benchmark Published on the official website 2015 The final result of the ranking competition in . among , Alibaba cloud doesn't need 7 minute (377 second ) It's done. 100TB ...

  3. Organize class notes pl/sql orcale abnormal

      1>>>>> Exception error handling 1 > Predefined exception handling Part of the predefined description ORACLE Exception error the handling of this exception , Just in PL/SQL The exception handling part of the block , Direct reference to the corresponding ...

  4. javascript How to get the absolute distance from the element node to the page

    var div = document.getElementById('div');var p = getPos(div); function getPos(obj) { var pos = {left ...

  5. Eclipse perl Of IDE Environment plug in -EPIC

    Premise :1. Install well perl Environmental Science :ActivePerl( Verification method :cmd Input in perl -v See if there's a reaction ~) 2. install Eclipse 3.0 Above version Optional : install PadWalker package , It's mainly global variable tracking ...

  6. Luogu P3810 On the safety of blooms CDQ Divide and conquer ( Three dimensional partial order )

    good , This is a three-dimensional partial order template problem Of course, it's not that simple ..... First of all, blame rogue : The wretched face of flowers on the street is mercilessly destroyed : Such a nice name #( funny ) Well, after looking at the questions, we find that : This is a three-dimensional partial order . It's just ans No addition ...

  7. MYSQL5.5 Source code installation linux Next

    /* First install the necessary Libraries */ yum -y install gcc* ###### install MYSQL ###### First installation camke One . Support YUM, be yum install -y cmake Two ...

  8. iOS The local push of cannot be deleted. The solution is

    I've been studying Apple Push recently , When testing local push , Find a problem , Once you add a local push notification , When you modify the code , Delete app , When you run it again app, It will still pop up a push on the banner , Nima can't get rid of it , Almost collapsed , open ...

  9. 【luogu P2880 [USACO07JAN] A balanced lineup Balanced Lineup】 Answer key

    Topic link :https://www.luogu.org/problemnew/show/P2880 You forced me to use ST What a watch qaq #include <cstdio> #include < ...

  10. Microsoft Visual Studio 2012 Update 4 RC 3 Offline setup

    Microsoft Visual Studio 2012 Update 4 RC 3 Offline setup * Microsoft website address :* http://www.microsoft.com/en-us/download ...