| /* |
| * Copyright 2010 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Author: sligocki@google.com (Shawn Ligocki) |
| |
| #include "pagespeed/kernel/http/content_type.h" |
| |
| #include <vector> |
| |
| #include "base/logging.h" |
| #include "pagespeed/kernel/base/basictypes.h" |
| #include "pagespeed/kernel/base/string_util.h" |
| |
| namespace net_instaweb { |
| |
| namespace { |
| |
| const ContentType kTypes[] = { |
| // Canonical types: |
| {"text/html", ".html", ContentType::kHtml}, // RFC 2854 |
| {"application/xhtml+xml", ".xhtml", ContentType::kXhtml}, // RFC 3236 |
| {"application/ce-html+xml", ".xhtml", ContentType::kCeHtml}, |
| |
| // RFC 4329 defines application/javascript as canonical for JavaScript. |
| // text/javascript can break firewall gzipping. |
| {"application/javascript", ".js", ContentType::kJavascript}, |
| {"text/css", ".css", ContentType::kCss}, |
| {"text/plain", ".txt", ContentType::kText}, |
| {"text/xml", ".xml", ContentType::kXml}, // RFC 3023 |
| {"image/png", ".png", ContentType::kPng}, |
| {"image/gif", ".gif", ContentType::kGif}, |
| {"image/jpeg", ".jpg", ContentType::kJpeg}, |
| {"application/x-shockwave-flash", ".swf", ContentType::kSwf}, |
| {"image/webp", ".webp", ContentType::kWebp}, |
| // While the official MIME type is image/vnd.microsoft.icon, old IE browsers |
| // will not accept that type, so we use portable image/x-icon as canonical. |
| {"image/x-icon", ".ico", ContentType::kIco}, |
| {"application/javascript", ".json", ContentType::kJson}, |
| {"application/javascript", ".map", ContentType::kSourceMap}, |
| {"application/pdf", ".pdf", ContentType::kPdf}, // RFC 3778 |
| {"application/octet-stream", ".bin", ContentType::kOctetStream }, |
| // SVG is XML. If at some point we start optimizing svg as images, we need to |
| // be very careful not to just lump them in with images for all purposes, to |
| // avoid creating security vulnerabilities. |
| {"image/svg+xml", ".svg", ContentType::kXml}, |
| |
| // Synonyms; Note that the canonical types above are referenced by index |
| // in the named references declared below. The synonyms below are not |
| // index-sensitive. |
| {"application/x-javascript", ".js", ContentType::kJavascript}, |
| {"text/javascript", ".js", ContentType::kJavascript}, |
| {"text/x-javascript", ".js", ContentType::kJavascript}, |
| {"text/ecmascript", ".js", ContentType::kJavascript}, |
| {"text/js", ".js", ContentType::kJavascript}, |
| {"text/jscript", ".js", ContentType::kJavascript}, |
| {"text/x-js", ".js", ContentType::kJavascript}, |
| {"application/ecmascript", ".js", ContentType::kJavascript}, |
| {"application/json", ".json", ContentType::kJson}, |
| {"application/x-json", ".json", ContentType::kJson}, |
| {"image/jpeg", ".jpeg", ContentType::kJpeg}, |
| {"image/jpg", ".jpg", ContentType::kJpeg}, |
| {"image/vnd.microsoft.icon", ".ico", ContentType::kIco}, |
| {"text/html", ".htm", ContentType::kHtml}, |
| {"application/xml", ".xml", ContentType::kXml}, // RFC 3023 |
| |
| {"video/mpeg", ".mpg", ContentType::kVideo}, // RFC 2045 |
| {"video/mp4", ".mp4", ContentType::kVideo}, // RFC 4337 |
| {"video/3gp", ".3gp", ContentType::kVideo}, |
| {"video/x-flv", ".flv", ContentType::kVideo}, |
| {"video/ogg", ".ogg", ContentType::kVideo}, // RFC 5334 |
| {"video/webm", ".webm", ContentType::kVideo}, |
| {"video/x-ms-asf", ".asf", ContentType::kVideo}, |
| {"video/x-ms-wmv", ".wmv", ContentType::kVideo}, |
| {"video/quicktime", ".mov", ContentType::kVideo}, |
| {"video/mpeg4", ".mp4", ContentType::kVideo}, |
| |
| {"audio/mpeg", ".mp3", ContentType::kAudio}, |
| {"audio/ogg", ".ogg", ContentType::kAudio}, |
| {"audio/webm", ".webm", ContentType::kAudio}, |
| {"audio/mp4", ".mp4", ContentType::kAudio}, |
| {"audio/x-mpeg", ".mp3", ContentType::kAudio}, |
| {"audio/x-wav", ".wav", ContentType::kAudio}, |
| {"audio/mp3", ".mp3", ContentType::kAudio}, |
| {"audio/wav", ".wav", ContentType::kAudio}, |
| |
| {"binary/octet-stream", ".bin", ContentType::kOctetStream }, |
| }; |
| const int kNumTypes = arraysize(kTypes); |
| |
| } // namespace |
| |
| const ContentType& kContentTypeHtml = kTypes[0]; |
| const ContentType& kContentTypeXhtml = kTypes[1]; |
| const ContentType& kContentTypeCeHtml = kTypes[2]; |
| |
| const ContentType& kContentTypeJavascript = kTypes[3]; |
| const ContentType& kContentTypeCss = kTypes[4]; |
| const ContentType& kContentTypeText = kTypes[5]; |
| const ContentType& kContentTypeXml = kTypes[6]; |
| |
| const ContentType& kContentTypePng = kTypes[7]; |
| const ContentType& kContentTypeGif = kTypes[8]; |
| const ContentType& kContentTypeJpeg = kTypes[9]; |
| const ContentType& kContentTypeSwf = kTypes[10]; |
| const ContentType& kContentTypeWebp = kTypes[11]; |
| const ContentType& kContentTypeIco = kTypes[12]; |
| |
| const ContentType& kContentTypeJson = kTypes[13]; |
| const ContentType& kContentTypeSourceMap = kTypes[14]; |
| |
| const ContentType& kContentTypePdf = kTypes[15]; |
| |
| const ContentType& kContentTypeBinaryOctetStream = kTypes[16]; |
| |
| int ContentType::MaxProducedExtensionLength() { |
| return 4; // .jpeg or .webp |
| } |
| |
| bool ContentType::IsCss() const { |
| return type_ == kCss; |
| } |
| |
| bool ContentType::IsJs() const { |
| switch (type_) { |
| case kJavascript: |
| case kJson: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool ContentType::IsHtmlLike() const { |
| switch (type_) { |
| case kHtml: |
| case kXhtml: |
| case kCeHtml: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool ContentType::IsXmlLike() const { |
| switch (type_) { |
| case kXhtml: |
| case kXml: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool ContentType::IsFlash() const { |
| switch (type_) { |
| case kSwf: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool ContentType::IsImage() const { |
| switch (type_) { |
| case kPng: |
| case kGif: |
| case kJpeg: |
| case kWebp: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool ContentType::IsVideo() const { |
| return type_ == kVideo; |
| } |
| |
| bool ContentType::IsAudio() const { |
| return type_ == kAudio; |
| } |
| |
| const ContentType* NameExtensionToContentType(const StringPiece& name) { |
| // Get the name from the extension. |
| StringPiece::size_type ext_pos = name.rfind('.'); |
| if (ext_pos != StringPiece::npos) { |
| StringPiece ext = name.substr(ext_pos); |
| // TODO(jmarantz): convert to a map if the list gets large. |
| for (int i = 0; i < kNumTypes; ++i) { |
| if (StringCaseEqual(ext, kTypes[i].file_extension())) { |
| return &kTypes[i]; |
| } |
| } |
| } |
| return NULL; |
| } |
| |
| const ContentType* MimeTypeToContentType(const StringPiece& mime_type) { |
| // TODO(jmarantz): convert to a map if the list gets large. |
| |
| // The content-type can have a "; charset=...". We are not interested |
| // in that, for the purpose of our ContentType object. |
| // |
| // TODO(jmarantz): we should be grabbing the encoding, however, and |
| // saving it so that when we emit content-type headers for resources, |
| // they include the proper encoding. |
| StringPiece stripped_mime_type; |
| StringPiece::size_type semi_colon = mime_type.find(';'); |
| if (semi_colon == StringPiece::npos) { |
| stripped_mime_type = mime_type; |
| } else { |
| stripped_mime_type = mime_type.substr(0, semi_colon); |
| } |
| |
| for (int i = 0; i < kNumTypes; ++i) { |
| if (StringCaseEqual(stripped_mime_type, kTypes[i].mime_type())) { |
| return &kTypes[i]; |
| } |
| } |
| return NULL; |
| } |
| |
| // TODO(nforman): Have some further indication of whether |
| // content_type_str was just empty or invalid. |
| bool ParseContentType(const StringPiece& content_type_str, |
| GoogleString* mime_type, |
| GoogleString* charset) { |
| StringPiece content_type = content_type_str; |
| // Set default values |
| mime_type->clear(); |
| charset->clear(); |
| |
| if (content_type.empty()) { |
| return false; |
| } |
| |
| // Mime type is in the form: "\w+/\w+ *;(.*;)* *charset *= *\w+" |
| StringPieceVector semi_split; |
| SplitStringPieceToVector(content_type, ";", &semi_split, false); |
| if (semi_split.size() == 0) { |
| return false; |
| } |
| semi_split[0].CopyToString(mime_type); |
| for (int i = 1, n = semi_split.size(); i < n; ++i) { |
| StringPieceVector eq_split; |
| SplitStringPieceToVector(semi_split[i], "=", &eq_split, false); |
| if (eq_split.size() == 2) { |
| TrimWhitespace(&eq_split[0]); |
| if (StringCaseEqual(eq_split[0], "charset")) { |
| TrimWhitespace(&eq_split[1]); |
| eq_split[1].CopyToString(charset); |
| break; |
| } |
| } |
| } |
| |
| return !mime_type->empty() || !charset->empty(); |
| } |
| |
| void MimeTypeListToContentTypeSet( |
| const GoogleString& in, |
| std::set<const ContentType*>* out) { |
| CHECK(out != NULL) << "'out' is a required parameter."; |
| out->clear(); |
| if (in.empty()) { |
| return; |
| } |
| StringPieceVector strings; |
| SplitStringPieceToVector(in, ",", &strings, true /* omit_empty */); |
| for (StringPieceVector::const_iterator i = strings.begin(), e = strings.end(); |
| i != e; ++i) { |
| const ContentType* ct = MimeTypeToContentType(*i); |
| if (ct == NULL) { |
| LOG(WARNING) << "'" << *i << "' is not a recognized mime-type."; |
| } else { |
| VLOG(1) << "Adding '" << *i << "' to the content-type set."; |
| out->insert(ct); |
| } |
| } |
| } |
| |
| bool ContentType::IsCompressible() const { |
| // TODO(jcrowell): Investigate images with exif data as compressible. |
| return IsXmlLike() || IsHtmlLike() || IsJs() || IsCss() || type_ == kText; |
| } |
| |
| bool ContentType::IsLikelyStaticResource() const { |
| switch (type_) { |
| case kCeHtml: |
| case kHtml: |
| case kJson: |
| case kSourceMap: |
| case kOctetStream: |
| case kOther: |
| case kText: |
| case kXhtml: |
| case kXml: |
| return false; |
| case kCss: |
| case kGif: |
| case kIco: |
| case kJavascript: |
| case kJpeg: |
| case kPdf: |
| case kPng: |
| case kSwf: |
| case kVideo: |
| case kAudio: |
| case kWebp: |
| return true; |
| }; |
| LOG(DFATAL) << "Unexpected content type: " << type_; |
| return false; |
| } |
| |
| } // namespace net_instaweb |