C++标准库里面的string::rfind和string:find不是用快速匹配算法实现的,效率不是一般的差。
但是由于其逻辑比较简单,减少了函数调用的次数,反而有些时间觉得还是挺快的。
为了提高string::find和string::rfind的效率,我实现了两个类似的函数
StringRFind和StringFind,分别对应
string::rfind和string::find。
使用系统的 QueryPerformanceCounter 进行精确的速度测试,发现我的程序比标准库的快50倍左右。
我的程序的算法是快速匹配算法的简化,采用1级跳跃的方式实现O(N+X*M)的性能,当匹配失败时,如果已经匹配的字符个数没有超过要匹配字符串中首字母的重复距离,即可不用回退,采用下一个字母与要匹配的字符串进行比较。
在实践中还发现,string::find(char)比较string::find(string)慢很多,所以建议不要使用string::find(char)
size_t StringRFind(
IN const string& strContent,
IN const string& strToFind,
IN size_t nStartPos)
{
size_t nToFindLen = strToFind.length();
//no enough charactor to compare
if((nStartPos < nToFindLen) || (nStartPos
>= strContent.length()))
{
return
string::npos;
}
//iterator to begin of the strContent
string::const_iterator
itContentBegin = strContent.begin();
//iterator to last charactor of the strToFind
string::const_iterator
itToFindEnd = strToFind.begin() + nToFindLen - 1;
//iterator to current scan charactor in strToFind
string::const_iterator
itCharInToFind = itToFindEnd;
//iterator to current scan charactor in strContent
string::const_iterator
itCharInContent = itContentBegin + nStartPos;
//matched charactors number
size_t nCharMatched = 0;
//repeat charactor with first charactor in strToFind
size_t nNoRepeatLen = 1;
itCharInToFind --;
while((nNoRepeatLen != nToFindLen)
&& ((*itCharInToFind) !=
(*itToFindEnd)))
{
nNoRepeatLen++;
itCharInToFind --;
}
itCharInToFind = itToFindEnd;
//the for loop will not stop at itCharInContent ==
itContentBegin, because maybe (*itContentBegin) ==
(*itCharInToFind)
for(; nCharMatched != nToFindLen ; itCharInContent--)
{
//compare string
while((*itCharInContent) == (*itCharInToFind))
{
nCharMatched++;
if((itCharInContent == itContentBegin) ||
(nCharMatched == nToFindLen))
{
break;
}
itCharInContent --;
itCharInToFind --;
}
//all charactor match, or no content to search
if((itCharInContent == itContentBegin) || (nCharMatched
== nToFindLen))
{
break;
}
//only nCharMatched charactors match
if(nCharMatched > 0)
{
//move back only when match char num large than
no repeat char in strToFind
if(nCharMatched >
nNoRepeatLen)
{
itCharInContent += nCharMatched;
}else
itCharInContent += 1;
itCharInToFind = itToFindEnd;
nCharMatched = 0;
}
}
//all charactor match
if(nCharMatched == nToFindLen)
{
return itCharInContent - itContentBegin;
}
//string no found
return string::npos;
}
size_t StringFind(
IN const string& strContent,
IN const string& strToFind,
IN size_t nStartPos)
{
size_t nToFindLen = strToFind.length();
//no enough charactor to compare
if(nStartPos + nToFindLen >=
strContent.length())
{
return
string::npos;
}
//iterator to begin of the strContent
string::const_iterator
itContentBegin = strContent.begin();
//iterator to end of the strContent
string::const_iterator
itContentEnd = strContent.begin()+strContent.length()-1;
//iterator to last charactor of the strToFind
//string::const_iterator
itToFindEnd = strToFind.begin() + nToFindLen - 1;
//iterator to first charactor of the strToFind
string::const_iterator
itToFindBegin = strToFind.begin();
//iterator to current scan charactor in strToFind
string::const_iterator
itCharInToFind = itToFindBegin;
//iterator to current scan charactor in strContent
string::const_iterator
itCharInContent = itContentBegin + nStartPos;
//matched charactors number
size_t nCharMatched = 0;
//repeat charactor with first charactor in strToFind
size_t nNoRepeatLen = 1;
itCharInToFind ++;
while((nNoRepeatLen != nToFindLen)
&& ((*itCharInToFind) !=
(*itToFindBegin)))
{
nNoRepeatLen++;
itCharInToFind --;
}
itCharInToFind = itToFindBegin;
//the for loop will not stop at itCharInContent ==
itContentEnd, because maybe (*itContentEnd) ==
(*itCharInToFind)
for(; nCharMatched != nToFindLen ; itCharInContent++)
{
//compare string
while((*itCharInContent) == (*itCharInToFind))
{
nCharMatched++;
if((itCharInContent == itContentEnd) ||
(nCharMatched == nToFindLen))
{
break;
}
itCharInContent ++;
itCharInToFind ++;
}
//all charactor match, or no content to search
if((itCharInContent == itContentEnd) || (nCharMatched
== nToFindLen))
{
break;
}
//only nCharMatched charactors match
if(nCharMatched > 0)
{
//move back only when match char num large than
no repeat char in strToFind
if(nCharMatched >
nNoRepeatLen)
{
itCharInContent -= nCharMatched;
}else
itCharInContent -= 1;
itCharInToFind = itToFindBegin;
nCharMatched = 0;
}
}
//all charactor match
if(nCharMatched == nToFindLen)
{
return itCharInContent - itContentBegin;
}
//string no found
return string::npos;
}