Skip to content
Advertisements

How a Regex Engine Work?

A regular expression defines a set of string.

A regex engine matches a pattern against input and returns true/false for the condition that pattern exists in the input.
I have got a C++ implementation of a regex engine which uses simple constructs of matching the following:

  1. A single character
  2. An equal length pattern and input
  3. A pattern that has ^ and $
  4. Wildcards (? and *)

It is inspired by https://nickdrane.com/build-your-own-regex/ and https://swtch.com/~rsc/regexp/regexp1.html.

The code is as following:

#include<iostream>

using namespace std;

bool MatchQuestion(const char * pattern, const char *input);
bool MatchStar(const char *pattern, const char *input);

bool matchOne(char pattern, char input)
{
  cout << "matchOne" << "patt=" << pattern << "input=" << input << endl;
 if (pattern == '\0')
     return true;

  if (input == '\0')
      return false;

  return pattern == input;
}

bool MatchSameLength(const char *pattern, const char* input) 
{
    if (*pattern == '\0')
        return true;

    return matchOne(pattern[0], input[0]) && MatchSameLength(pattern+1, input+1);
}

bool MatchAnywhere(const char *pattern, const char *input)
{
    //cout << "patt=" << *pattern << "input=" << *input << endl;
    if (*pattern == '\0')
        return true;

    if (*pattern == '$' && *input == '\0')
        return true;

    if (pattern[1] == '?') {
        return MatchQuestion(pattern, input);
    }

    if (pattern[1] == '*') {
        return MatchStar(pattern, input);
    }

    return matchOne(pattern[0], input[0]) && MatchAnywhere(pattern+1, input+1);
}

bool MatchQuestion(const char * pattern, const char *input)
{
    return (
        MatchAnywhere(pattern+2, input) ||
            (matchOne(*pattern, *input) && MatchAnywhere(pattern+2, input))
            );
}

bool MatchStar(const char *pattern, const char *input)
{
 return (
     MatchAnywhere(pattern+2, input) ||
     (matchOne(*pattern, *input) && MatchAnywhere(pattern, input+1))
 );
}

bool search(const char *pattern, const char *input )
{
    if (*pattern == '^') {
        return MatchAnywhere(pattern+1, input);
    }

    while (*input) {
        if (MatchAnywhere(pattern, input))
            return true;
        ++input;
        }
    return false;    
}

int main() {
    //cout << MatchSameLength("aab", "aab") << endl;
    //cout << MatchAnywhere("^b$", "b") << endl;

    cout << search("ax*b", "abc") << endl;
    return 1;
}
References

Written with StackEdit.

Advertisements

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

This site uses Akismet to reduce spam. Learn how your comment data is processed.

%d bloggers like this: