C语言实现KMP模式匹配算法

C语言实现KMP模式匹配算法

C语言实现KMP模式匹配算法

next:

/*!
 * Description:
 * author scictor <scictor@gmail.com>
 * date 2018/7/4
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// https://tekmarathon.com/2013/05/14/algorithm-to-find-substring-in-a-string-kmp-algorithm/
/*What is Partial Match Table?
It is an array of size (pattern_length+1) where, for each position of i in pattern p, b[i] is defined such that it takes the ‘length of the longest proper suffix of P[1…i-1]’ that matches with the ‘prefix of P’.
What is longest prefix/suffix match??? Proper prefix is a prefix which is not same as the substring. Recall proper set which is a subset of a set but is not same as the set.
Why a prefix should match suffix of the pattern? its because when we shift the pattern its the prefix of P which comes towards the suffix. And also the key idea is that if we have successfully matched prefix P[1…i-1] of the pattern with the substring T[j-(i-1)…j-1] of the input string and P(i)!=T(j), then we dont need to reprocess any of the suffix T[j-(i-1)…j-1] since we know this portion of the text string is the prefix of the pattern that we have just matched.
*/
//"ababacb"
/**
 * Pre processes the pattern array based on proper prefixes and proper
 * suffixes at every position of the array
 *
 * @param ptrn
 *            word that is to be searched in the search string
 * @return partial match table which indicates
 */
void kmp_next(const char *pattern, int patternLen, int *next) {
    int i = 0, j = -1;

    next[i] = j; // default next[0] = -1
    while (i < patternLen) {
        while (j >= 0 && pattern[i] != pattern[j]) {
            // if there is mismatch consider the next widest border
            // The borders to be examined are obtained in decreasing order from
            //  the values b[i], b[b[i]] etc.
            j = next[j];
        }
        i++;
        j++;
        next[i] = j;
    }
    for(int index = 0; index < patternLen; ++index) printf("%d ", next[index]);
    return;
}

/**
     * Based on the pre processed array, search for the pattern in the text
     *
     * @param text
     *            text over which search happens
     * @param ptrn
     *            pattern that is to be searched
     */

//int matchIndex[128] = {0};

int kmp_search(const char *text, int textLen, const char *pattern, int patternLen) {
    int i = 0, j = 0;

    // initialize new array and preprocess the pattern
    int next[patternLen + 1];
    memset(next, 0x00, sizeof(next));

//    int idx = 0;
//    memset(matchIndex, 0x00, sizeof(matchIndex));

    kmp_next(pattern, patternLen, next);

    while (i < textLen) {
        while (j >= 0 && text[i] != pattern[j]) {
            j = next[j];
        }
        i++;
        j++;

        // a match is found
        //        if (j == patternLen) {
        //            printf("found substring at index:" + (i - patternLen));
        //            j = next[j];
        //        }
        if (j == patternLen) {
            printf("found substring at index:%d", (i - patternLen));
            //j = next[j];
            //matchIndex[idx++] = i - patternLen;
            return (i - patternLen);
        }
    }

//    for(int k = 0; k < idx; ++k)
//    {
//        printf("%d ", matchIndex[k]);
//    }
    return -1;
}

/*
Index         0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16  17  18  19  20  21  22  23
Text(T)       a b c a b d a b c a b  d  a  b  c  a  b   d   a   b   d   a   b   c
Pattern(P)    a b c a b d a b c
PMT (next[i])   -1 0 0 0 1 2 0 1 2 3
 */
// 4ms
int kmp(const char *text, int textLen, const char *pattern, int patternLen)
{
    int *T;
    int i, j;

    if (pattern[0] == '