leetcode 10 Regular Expression Matching(简单正则表达式匹配
最近代码写的少了,而leetcode一直想做一个python,c/c++解题报告的专题,c/c++一直是我非常喜欢的,c语言编程练习的重要性体现在linux内核编程以及一些大公司算法上机的要求,python主要为了后序转型数据分析和机器学习,所以今天来做一个难度为hard 的简单正则表达式匹配。 做了很多leetcode题目,我们来总结一下套路: 当编程成为一种解决问题的习惯,我们就成为了一名纯粹的程序员 leetcode 10 Regular Expression Matching(简单正则表达式匹配) 题目描述
题目意义及背景It might seem deceptively easy even you know the general idea,but programming it correctly with all the details require careful thought. 在程序设计中,规划所有的细节问题都需要认真思考。 题目分析以及需要注意的问题为什么aab可以匹配模式c*a*b呢?● It is a regular expression.Not a wild card.So the ” * ” does not mean any string.And the cab should be split like this “c * a * b” which means N “c”,N “a” and One “b”. ’ * ’ Matches zero or more of the preceding element,so ” c* ” could match nothing. 此题不能使用贪心法考虑情况: s = “ac”,p = “ab*c” After the first ‘a’,we see that there is no b’s to skip for “b*”. We match the last ‘c’ on both side and conclude that they both match. It seems that being greedy is good. But how about this case? s = “abbc”,p = “ab*bbc” 可能还有人说,如果碰到这种情况可以先看一下*后面的内容,但是碰见下面的情况就不好办了。 s = “abcbcd”,p = “a.*c.*d” 所以: Hints: A sample diagram of a deterministic finite state automata (DFA). DFAs are useful for doing lexical analysis and pattern matching. An example is UNIX’s grep tool. Please note that this post does not attempt to descibe a solution using DFA. 什么是DFA? Solution主要解决方案是回溯法,使用递归或者dp We need some kind of backtracking mechanism (回溯法)such that when a matching fails,we return to the last successful matching state and attempt to match more characters in s with ‘*’. This approach leads naturally to recursion. The recursion mainly breaks down elegantly to the following two cases: 主要考虑两种递归情况 Below is the extremely concise code (Excluding comments and asserts,it’s about 10 lines of code). 解题过程如下:
bool isMatch(const char *s,const char *p)
{
//判断参数合法,以及程序正常结束
assert( s && p);
if(*p == ' ') return *s == ' ';
//next char is not '*'; must match current character
if(*(p+1) != '*')
{
assert(*p != '*');//考虑情况isMatch('aa','a*');
return ((*p == *s) ||(*p == '.' && *s != ' ')) && isMatch(s + 1,p + 1);
}
//next char is '*' 继续递归匹配,不能写成*(p+1) == '*' 考虑情况isMatch('ab','.*c')
while((*p == *s)|| (*p == '.' && *s != ' '))
{
if (isMatch(s,p+2)) return true;
s++;
}
//匹配下一个模式
return isMatch(s,p+2);
}
此代码运行时间:18ms Further Thoughts:
leetcode的 解题报告提醒我们说: leetcode的解答报告中说的If you are stuck,recursion is your friend. // 递归版,时间复杂度O(n),空间复杂度O(1) class Solution {
public:
bool isMatch(const char *s,const char *p)
{
if (*p == ' ') return *s == ' ';
// next char is not '*',then must match current character
if (*(p + 1) != '*')
{
if (*p == *s || (*p == '.' && *s != ' '))
return isMatch(s + 1,p + 1);
else
return false;
}
else
{ // next char is '*'
while (*p == *s || (*p == '.' && *s != ' '))
{
if (isMatch(s,p + 2))
return true;
s++;
}
return isMatch(s,p + 2);
}
}
};
c++解决方案:My concise recursive and DP solutions with full explanation in C++ class Solution {
public:
bool isMatch(string s,string p) {
if (p.empty()) return s.empty();
if ('*' == p[1])
// x* matches empty string or at least one character: x* -> xx*
// *s is to ensure s is non-empty
return (isMatch(s,p.substr(2)) || !s.empty() && (s[0] == p[0] || '.' == p[0]) && isMatch(s.substr(1),p));
else
return !s.empty() && (s[0] == p[0] || '.' == p[0]) && isMatch(s.substr(1),p.substr(1));
}
};
class Solution {
public:
bool isMatch(string s,string p) {
/** * f[i][j]: if s[0..i-1] matches p[0..j-1] * if p[j - 1] != '*' * f[i][j] = f[i - 1][j - 1] && s[i - 1] == p[j - 1] * if p[j - 1] == '*',denote p[j - 2] with x * f[i][j] is true iff any of the following is true * 1) "x*" repeats 0 time and matches empty: f[i][j - 2] * 2) "x*" repeats >= 1 times and matches "x*x": s[i - 1] == x && f[i - 1][j] * '.' matches any single character */
int m = s.size(),n = p.size();
vector<vector<bool>> f(m + 1,vector<bool>(n + 1,false));
f[0][0] = true;
for (int i = 1; i <= m; i++)
f[i][0] = false;
// p[0..,j - 3,j - 2,j - 1] matches empty iff p[j - 1] is '*' and p[0..j - 3] matches empty
for (int j = 1; j <= n; j++)
f[0][j] = j > 1 && '*' == p[j - 1] && f[0][j - 2];
for (int i = 1; i <= m; i++)
for (int j = 1; j <= n; j++)
if (p[j - 1] != '*')
f[i][j] = f[i - 1][j - 1] && (s[i - 1] == p[j - 1] || '.' == p[j - 1]);
else
// p[0] cannot be '*' so no need to check "j > 1" here
f[i][j] = f[i][j - 2] || (s[i - 1] == p[j - 2] || '.' == p[j - 2]) && f[i - 1][j];
return f[m][n];
}
};
The shortest AC code. 1.’.’ is easy to handle. if p has a ‘.’,it can pass any single character in s except ‘ ’. class Solution {
public:
bool matchFirst(const char *s,const char *p){
return (*p == *s || (*p == '.' && *s != ' '));
}
bool isMatch(const char *s,const char *p) {
if (*p == ' ') return *s == ' '; //empty
if (*(p + 1) != '*') {//without *
if(!matchFirst(s,p)) return false;
return isMatch(s + 1,p + 1);
} else { //next: with a *
if(isMatch(s,p + 2)) return true; //try the length of 0
while ( matchFirst(s,p) ) //try all possible lengths
if (isMatch(++s,p + 2))return true;
}
}
};
a shorter one (14 lines of code) with neatly format: class Solution {
public:
bool isMatch(const char *s,const char *p) {
for( char c = *p; c != 0; ++s,c = *p ) {
if( *(p+1) != '*' )
p++;
else if( isMatch( s,p+2 ) )
return true;
if( (*s==0) || ((c!='.') && (c!=*s)) )
return false;
}
return *s == 0;
}
};
9-lines 16ms C++ DP Solutions with Explanations
class Solution {
public:
bool isMatch(string s,string p) {
int m = s.length(),n = p.length();
vector<vector<bool> > dp(m + 1,vector<bool> (n + 1,false));
dp[0][0] = true;
for (int i = 0; i <= m; i++)
for (int j = 1; j <= n; j++)
if (p[j - 1] == '*')
dp[i][j] = dp[i][j - 2] || (i > 0 && (s[i - 1] == p[j - 2] || p[j - 2] == '.') && dp[i - 1][j]);
else dp[i][j] = i > 0 && dp[i - 1][j - 1] && (s[i - 1] == p[j - 1] || p[j - 1] == '.');
return dp[m][n];
}
};
2 years agoreply quote python解决方案使用re库,叼炸天! import re
class Solution:
# @return a boolean
def isMatch(self,s,p):
return re.match('^' + p + '$',s) != None
# debug
s = Solution()
print (s.isMatch("aaa",".*"))
Python DP solution in 36 ms def isMatch(self,p):
m = len(s)
n = len(p)
dp = [[True] + [False] * m]
for i in xrange(n):
dp.append([False]*(m+1))
for i in xrange(1,n + 1):
x = p[i-1]
if x == '*' and i > 1:
dp[i][0] = dp[i-2][0]
for j in xrange(1,m+1):
if x == '*':
dp[i][j] = dp[i-2][j] or dp[i-1][j] or (dp[i-1][j-1] and p[i-2] == s[j-1]) or (dp[i][j-1] and p[i-2]=='.')
elif x == '.' or x == s[j-1]:
dp[i][j] = dp[i-1][j-1]
return dp[n][m]
about a year agoreply quote
class Solution(object):
def isMatch(self,p,memo={("",""):True}):
if not p and s: return False
if not s and p: return set(p[1::2]) == {"*"} and not (len(p) % 2)
if (s,p) in memo: return memo[s,p]
char,exp,prev = s[-1],p[-1],0 if len(p) < 2 else p[-2]
memo[s,p] =
(exp == '*' and ((prev in {char,'.'} and self.isMatch(s[:-1],memo)) or self.isMatch(s,p[:-2],memo)))
or
(exp in {char,p[:-1],memo))
return memo[s,p]
# 445 / 445 test cases passed.
# Status: Accepted
# Runtime: 72 ms
8ms backtracking solution C++ //regular expression matching
//first solution: using recursive version
class Solution {
public:
bool isMatch(string s,string p) {
int m = s.length(),n = p.length();
return backtracking(s,m,n);
}
bool backtracking(string& s,int i,string& p,int j) {
if (i == 0 && j == 0) return true;
if (i != 0 && j == 0) return false;
if (i == 0 && j != 0) {
//in this case only p == "c*c*c*" this pattern can match null string
if (p[j-1] == '*') {
return backtracking(s,i,j-2);
}
return false;
}
//now both i and j are not null
if (s[i-1] == p[j-1] || p[j-1] == '.') {
return backtracking(s,i - 1,j - 1);
} else if (p[j-1] == '*') {
//two cases: determines on whether p[j-2] == s[i-1]
//first p[j-2]* matches zero characters of p
if (backtracking(s,j - 2)) return true;
//second consider whether p[j-2] == s[i-1],if true,then s[i-1] is matched,move to backtracking(i - 1,j)
if (p[j-2] == s[i-1] || p[j-2] == '.') {
return backtracking(s,j);
}
return false;
}
return false;
}
};
c语言参考解决方案: bool isMatch(char *s,char *p){
int i;
int ls = strlen(s);
int lp = strlen(p);
bool* m = malloc((ls + 1) * sizeof(bool));
// init
m[0] = true;
for (i = 1; i <= ls; i++) {
m[i] = false;
}
int ip;
for (ip = 0; ip < lp; ip++) {
if (ip + 1 < lp && p[ip + 1] == '*') {
// do nothing
}
else if (p[ip] == '*') {
char c = p[ip - 1];
for (i = 1; i <= ls; i++) {
m[i] = m[i] || (m[i - 1] && (s[i - 1] == c || c == '.'));
}
}
else {
char c = p[ip];
for (i = ls; i > 0; i--) {
m[i] = m[i - 1] && (s[i - 1] == c || c == '.');
}
m[0] = false;
}
}
bool ret = m[ls];
free(m);
return ret;
}
简短的代码: bool isMatch(char* s,char* p) {
while (*s) {
if (*p&&*(p+1)=='*') {
if (!(*p==*s||*p=='.')) {p+=2;continue;}
if (!isMatch(s,p+2)) {s++;continue;} else return true;
}
if (*p==*s||*p=='.') {s++;p++;continue;}
return false;
}
while(*p&&*(p+1)=='*') p+=2;
return !*p;
}
参考文献http://articles.leetcode.com/regular-expression-matching (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |