blob: 9e7ddcb69bbe90946baab6b892e1bd728d33110f [file] [log] [blame]
/*
* Copyright 2012 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: matterbury@google.com (Matt Atterbury)
#include "pagespeed/kernel/base/charset_util.h"
#include <cstring>
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/gtest.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
namespace net_instaweb {
namespace {
class CharsetUtilTest : public testing::Test {
public:
CharsetUtilTest() {}
// buffer must point to a large enough char array, and target will be set to
// point to it with the correct length.
void TestCharsetForBom(const StringPiece bom,
const StringPiece contents,
const StringPiece charset) {
GoogleString target = StrCat(bom, contents);
EXPECT_EQ(charset, GetCharsetForBom(target));
}
private:
DISALLOW_COPY_AND_ASSIGN(CharsetUtilTest);
};
TEST_F(CharsetUtilTest, ProperBom) {
EXPECT_EQ(3, std::strlen(kUtf8Bom));
EXPECT_EQ(0xEF, static_cast<unsigned char>(kUtf8Bom[0]));
EXPECT_EQ(0xBB, static_cast<unsigned char>(kUtf8Bom[1]));
EXPECT_EQ(0xBF, static_cast<unsigned char>(kUtf8Bom[2]));
}
TEST_F(CharsetUtilTest, StripUtf8Bom) {
StringPiece originalContents("<!DOCTYPE yadda yadda>");
GoogleString contents;
originalContents.CopyToString(&contents);
StringPiece noBomContents(contents);
EXPECT_FALSE(StripUtf8Bom(&noBomContents));
EXPECT_STREQ(originalContents, noBomContents);
contents = StrCat(kUtf8Bom, originalContents);
StringPiece utf8Contents(contents);
EXPECT_TRUE(StripUtf8Bom(&utf8Contents));
EXPECT_STREQ(originalContents, utf8Contents);
contents = StrCat(kUtf16BigEndianBom, originalContents);
StringPiece utf16beContents(contents);
EXPECT_FALSE(StripUtf8Bom(&utf16beContents));
EXPECT_STREQ(StrCat(kUtf16BigEndianBom, originalContents),
utf16beContents);
}
TEST_F(CharsetUtilTest, GetCharsetForBom) {
StringPiece contents("<!DOCTYPE yadda yadda>");
TestCharsetForBom(StringPiece(), contents, StringPiece());
TestCharsetForBom(StringPiece(kUtf8Bom, STATIC_STRLEN(kUtf8Bom)),
contents, kUtf8Charset);
TestCharsetForBom(StringPiece(kUtf16BigEndianBom,
STATIC_STRLEN(kUtf16BigEndianBom)),
contents, kUtf16BigEndianCharset);
TestCharsetForBom(StringPiece(kUtf16LittleEndianBom,
STATIC_STRLEN(kUtf16LittleEndianBom)),
contents, kUtf16LittleEndianCharset);
TestCharsetForBom(StringPiece(kUtf32BigEndianBom,
STATIC_STRLEN(kUtf32BigEndianBom)),
contents, kUtf32BigEndianCharset);
TestCharsetForBom(StringPiece(kUtf32LittleEndianBom,
STATIC_STRLEN(kUtf32LittleEndianBom)),
contents, kUtf32LittleEndianCharset);
}
} // namespace
} // namespace net_instaweb