blob: 0c0f77629c059164e97c9590d429af4df0fb416a [file] [log] [blame]
/**
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
#include "net/instaweb/util/public/wildcard.h"
#include "net/instaweb/util/public/string_util.h"
namespace net_instaweb {
const char Wildcard::kMatchAny = '*';
const char Wildcard::kMatchOne = '?';
Wildcard::Wildcard(const StringPiece& wildcard_spec)
: storage_(wildcard_spec.data(), wildcard_spec.size()) {
InitFromStorage();
}
void Wildcard::InitFromStorage() {
// Pre-scan the wildcard spec into an array of StringPieces. We will
// copy the original string spec into storage_ but that will only be
// backing-store for the StringPieces.
//
// E.g. we will transform "a?c*def**?xyz?" into
// {"a", "?", "c", "*", "def", "*", "?", "xyz", "?"}
// Note that multiple consecutive '*' will be collapsed into one.
//
// TODO(jmarantz): jmaessen suggests moving ? ahead of * as it will
// reduce code complexity and the cost of backtracking.
int num_pieces = 0;
char prev_c = '\0';
for (int i = 0, n = storage_.size(); i < n; ++i) {
char c = storage_[i];
bool add_to_previous = (num_pieces != 0);
bool skip = false;
if (add_to_previous) {
if (c == kMatchAny) {
add_to_previous = false;
skip = (prev_c == kMatchAny); // multiple * in a row means nothing
} else if (c == kMatchOne) {
add_to_previous = false;
} else {
add_to_previous = ((prev_c != kMatchAny) && (prev_c != kMatchOne));
}
}
if (add_to_previous) {
pieces_[num_pieces - 1] = StringPiece(pieces_[num_pieces - 1].data(),
pieces_[num_pieces - 1].size() + 1);
} else if (!skip) {
pieces_.push_back(StringPiece(storage_.data() + i, 1));
++num_pieces;
}
prev_c = c;
}
}
bool Wildcard::MatchHelper(int piece_index, const StringPiece& str) {
bool prev_was_any = false;
size_t num_skips_after_any = 0;
size_t str_index = 0;
// Walk through the pieces parsed out in the constructor, matching them
// against str. Our algorithm will walk linealy through str, although we
// may need to backtrack if there are multiple matches for the segment
// following a *.
for (int num_pieces = pieces_.size(); piece_index < num_pieces; ++piece_index) {
StringPiece piece = pieces_[piece_index];
if (piece[0] == kMatchAny) {
prev_was_any = true;
} else if (piece[0] == kMatchOne) {
if (prev_was_any) {
++num_skips_after_any;
} else {
++str_index;
if (str_index > str.size()) {
return false;
}
}
} else if (prev_was_any) {
str_index += num_skips_after_any;
if (str_index > str.size()) {
return false;
}
// Now we have the unenviable task of figuring out how many
// characters of 'str' to swallow. Consider this complexity.
// CHECK(Wildcard("*abcd?").Match("abcabcdabcdabcdabcde"));
// If we greedily match the "a" in the wildcard against any of
// the first 4 'a's in the string then we are screwed -- we
// won't find the d. Even if we match against the first or
// second "abcd" we will get a failure because we will have
// string left, but no more pattern.
//
// There are probably more efficient ways to do this, such as in
// http://code.google.com/p/re2/, but we will, for short-term
// expediency, use recursion to search all the possible matches
// for the current piece in str.
while ((str_index = str.find(piece, str_index)) != StringPiece::npos) {
if (MatchHelper(piece_index + 1,
str.substr(str_index + piece.size()))) {
return true;
}
++str_index;
}
return false;
} else if ((str.size() - str_index) < piece.size()) {
return false;
} else if (str.substr(str_index, piece.size()) == piece) {
str_index += piece.size();
} else {
return false;
}
}
if (prev_was_any) {
return (str.size() >= num_skips_after_any);
}
return (str_index == str.size());
}
bool Wildcard::IsSimple() const {
if (pieces_.size() == 0) {
return true;
}
if (pieces_.size() != 1) {
return false;
}
StringPiece piece = pieces_[0];
CHECK(!piece.empty());
char ch = piece[0];
return ((ch != kMatchAny) && (ch != kMatchOne));
}
Wildcard* Wildcard::Duplicate() const {
return new Wildcard(storage_);
}
} // namespace net_instaweb