A regular expression defines a set of string.
A regex engine matches a pattern against input and returns true/false
for the condition that pattern exists in the input.
I have got a C++ implementation of a regex engine which uses simple constructs of matching the following:
- A single character
- An equal length pattern and input
- A pattern that has
^
and$
- Wildcards (
?
and*
)
It is inspired by https://nickdrane.com/build-your-own-regex/
and https://swtch.com/~rsc/regexp/regexp1.html
.
The code is as following:
#include<iostream>
using namespace std;
bool MatchQuestion(const char * pattern, const char *input);
bool MatchStar(const char *pattern, const char *input);
bool matchOne(char pattern, char input)
{
cout << "matchOne" << "patt=" << pattern << "input=" << input << endl;
if (pattern == '\0')
return true;
if (input == '\0')
return false;
return pattern == input;
}
bool MatchSameLength(const char *pattern, const char* input)
{
if (*pattern == '\0')
return true;
return matchOne(pattern[0], input[0]) && MatchSameLength(pattern+1, input+1);
}
bool MatchAnywhere(const char *pattern, const char *input)
{
//cout << "patt=" << *pattern << "input=" << *input << endl;
if (*pattern == '\0')
return true;
if (*pattern == '$' && *input == '\0')
return true;
if (pattern[1] == '?') {
return MatchQuestion(pattern, input);
}
if (pattern[1] == '*') {
return MatchStar(pattern, input);
}
return matchOne(pattern[0], input[0]) && MatchAnywhere(pattern+1, input+1);
}
bool MatchQuestion(const char * pattern, const char *input)
{
return (
MatchAnywhere(pattern+2, input) ||
(matchOne(*pattern, *input) && MatchAnywhere(pattern+2, input))
);
}
bool MatchStar(const char *pattern, const char *input)
{
return (
MatchAnywhere(pattern+2, input) ||
(matchOne(*pattern, *input) && MatchAnywhere(pattern, input+1))
);
}
bool search(const char *pattern, const char *input )
{
if (*pattern == '^') {
return MatchAnywhere(pattern+1, input);
}
while (*input) {
if (MatchAnywhere(pattern, input))
return true;
++input;
}
return false;
}
int main() {
//cout << MatchSameLength("aab", "aab") << endl;
//cout << MatchAnywhere("^b$", "b") << endl;
cout << search("ax*b", "abc") << endl;
return 1;
}
References
Written with StackEdit.