src/third_party/closure_library/closure/goog/net/filedownloader.js - incubator-pagespeed-debian - Git at Google

 // Copyright 2011 The Closure Library Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS-IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 /**
  * @fileoverview A class for downloading remote files and storing them
  * locally using the HTML5 FileSystem API.
  *
  * The directory structure is of the form /HASH/URL/BASENAME:
  *
  * The HASH portion is a three-character slice of the hash of the URL. Since the
  * filesystem has a limit of about 5000 files per directory, this should divide
  * the downloads roughly evenly among about 5000 directories, thus allowing for
  * at most 5000^2 downloads.
  *
  * The URL portion is the (sanitized) full URL used for downloading the file.
  * This is used to ensure that each file ends up in a different location, even
  * if the HASH and BASENAME are the same.
  *
  * The BASENAME portion is the basename of the URL. It's used for the filename
  * proper so that the local filesystem: URL will be downloaded to a file with a
  * recognizable name.
  *
  */

 goog.provide('goog.net.FileDownloader');
 goog.provide('goog.net.FileDownloader.Error');

 goog.require('goog.Disposable');
 goog.require('goog.asserts');
 goog.require('goog.async.Deferred');
 goog.require('goog.crypt.hash32');
 goog.require('goog.debug.Error');
 goog.require('goog.events');
 goog.require('goog.events.EventHandler');
 goog.require('goog.fs');
 goog.require('goog.fs.DirectoryEntry');
 goog.require('goog.fs.Error');
 goog.require('goog.fs.FileSaver');
 goog.require('goog.net.EventType');
 goog.require('goog.net.XhrIo');
 goog.require('goog.net.XhrIoPool');
 goog.require('goog.object');


 /**
  * A class for downloading remote files and storing them locally using the
  * HTML5 filesystem API.
  *
  * @param {!goog.fs.DirectoryEntry} dir The directory in which the downloaded
  *     files are stored. This directory should be solely managed by
  *     FileDownloader.
  * @param {goog.net.XhrIoPool=} opt_pool The pool of XhrIo objects to use for
  *     downloading files.
  * @constructor
  * @extends {goog.Disposable}
  * @final
  */
 goog.net.FileDownloader = function(dir, opt_pool) {
   goog.net.FileDownloader.base(this, 'constructor');

   /**
    * The directory in which the downloaded files are stored.
    * @type {!goog.fs.DirectoryEntry}
    * @private
    */
   this.dir_ = dir;

   /**
    * The pool of XHRs to use for capturing.
    * @type {!goog.net.XhrIoPool}
    * @private
    */
   this.pool_ = opt_pool || new goog.net.XhrIoPool();

   /**
    * A map from URLs to active downloads running for those URLs.
    * @type {!Object<!goog.net.FileDownloader.Download_>}
    * @private
    */
   this.downloads_ = {};

   /**
    * The handler for URL capturing events.
    * @type {!goog.events.EventHandler<!goog.net.FileDownloader>}
    * @private
    */
   this.eventHandler_ = new goog.events.EventHandler(this);
 };
 goog.inherits(goog.net.FileDownloader, goog.Disposable);


 /**
  * Download a remote file and save its contents to the filesystem. A given file
  * is uniquely identified by its URL string; this means that the relative and
  * absolute URLs for a single file are considered different for the purposes of
  * the FileDownloader.
  *
  * Returns a Deferred that will contain the downloaded blob. If there's an error
  * while downloading the URL, this Deferred will be passed the
  * {@link goog.net.FileDownloader.Error} object as an errback.
  *
  * If a download is already in progress for the given URL, this will return the
  * deferred blob for that download. If the URL has already been downloaded, this
  * will fail once it tries to save the downloaded blob.
  *
  * When a download is in progress, all Deferreds returned for that download will
  * be branches of a single parent. If all such branches are cancelled, or if one
  * is cancelled with opt_deepCancel set, then the download will be cancelled as
  * well.
  *
  * @param {string} url The URL of the file to download.
  * @return {!goog.async.Deferred} The deferred result blob.
  */
 goog.net.FileDownloader.prototype.download = function(url) {
   if (this.isDownloading(url)) {
     return this.downloads_[url].deferred.branch(true /* opt_propagateCancel */);
   }

   var download = new goog.net.FileDownloader.Download_(url, this);
   this.downloads_[url] = download;
   this.pool_.getObject(goog.bind(this.gotXhr_, this, download));
   return download.deferred.branch(true /* opt_propagateCancel */);
 };


 /**
  * Return a Deferred that will fire once no download is active for a given URL.
  * If there's no download active for that URL when this is called, the deferred
  * will fire immediately; otherwise, it will fire once the download is complete,
  * whether or not it succeeds.
  *
  * @param {string} url The URL of the download to wait for.
  * @return {!goog.async.Deferred} The Deferred that will fire when the download
  *     is complete.
  */
 goog.net.FileDownloader.prototype.waitForDownload = function(url) {
   var deferred = new goog.async.Deferred();
   if (this.isDownloading(url)) {
     this.downloads_[url].deferred.addBoth(function() {
       deferred.callback(null);
     }, this);
   } else {
     deferred.callback(null);
   }
   return deferred;
 };


 /**
  * Returns whether or not there is an active download for a given URL.
  *
  * @param {string} url The URL of the download to check.
  * @return {boolean} Whether or not there is an active download for the URL.
  */
 goog.net.FileDownloader.prototype.isDownloading = function(url) {
   return url in this.downloads_;
 };


 /**
  * Load a downloaded blob from the filesystem. Will fire a deferred error if the
  * given URL has not yet been downloaded.
  *
  * @param {string} url The URL of the blob to load.
  * @return {!goog.async.Deferred} The deferred Blob object. The callback will be
  *     passed the blob. If a file API error occurs while loading the blob, that
  *     error will be passed to the errback.
  */
 goog.net.FileDownloader.prototype.getDownloadedBlob = function(url) {
   return this.getFile_(url).
       addCallback(function(fileEntry) { return fileEntry.file(); });
 };


 /**
  * Get the local filesystem: URL for a downloaded file. This is different from
  * the blob: URL that's available from getDownloadedBlob(). If the end user
  * accesses the filesystem: URL, the resulting file's name will be determined by
  * the download filename as opposed to an arbitrary GUID. In addition, the
  * filesystem: URL is connected to a filesystem location, so if the download is
  * removed then that URL will become invalid.
  *
  * Warning: in Chrome 12, some filesystem: URLs are opened inline. This means
  * that e.g. HTML pages given to the user via filesystem: URLs will be opened
  * and processed by the browser.
  *
  * @param {string} url The URL of the file to get the URL of.
  * @return {!goog.async.Deferred} The deferred filesystem: URL. The callback
  *     will be passed the URL. If a file API error occurs while loading the
  *     blob, that error will be passed to the errback.
  */
 goog.net.FileDownloader.prototype.getLocalUrl = function(url) {
   return this.getFile_(url).
       addCallback(function(fileEntry) { return fileEntry.toUrl(); });
 };


 /**
  * Return (deferred) whether or not a URL has been downloaded. Will fire a
  * deferred error if something goes wrong when determining this.
  *
  * @param {string} url The URL to check.
  * @return {!goog.async.Deferred} The deferred boolean. The callback will be
  *     passed the boolean. If a file API error occurs while checking the
  *     existence of the downloaded URL, that error will be passed to the
  *     errback.
  */
 goog.net.FileDownloader.prototype.isDownloaded = function(url) {
   var deferred = new goog.async.Deferred();
   var blobDeferred = this.getDownloadedBlob(url);
   blobDeferred.addCallback(function() {
     deferred.callback(true);
   });
   blobDeferred.addErrback(function(err) {
     if (err.code == goog.fs.Error.ErrorCode.NOT_FOUND) {
       deferred.callback(false);
     } else {
       deferred.errback(err);
     }
   });
   return deferred;
 };


 /**
  * Remove a URL from the FileDownloader.
  *
  * This returns a Deferred. If the removal is completed successfully, its
  * callback will be called without any value. If the removal fails, its errback
  * will be called with the {@link goog.fs.Error}.
  *
  * @param {string} url The URL to remove.
  * @return {!goog.async.Deferred} The deferred used for registering callbacks on
  *     success or on error.
  */
 goog.net.FileDownloader.prototype.remove = function(url) {
   return this.getDir_(url, goog.fs.DirectoryEntry.Behavior.DEFAULT).
       addCallback(function(dir) { return dir.removeRecursively(); });
 };


 /**
  * Save a blob for a given URL. This works just as through the blob were
  * downloaded form that URL, except you specify the blob and no HTTP request is
  * made.
  *
  * If the URL is currently being downloaded, it's indeterminate whether the blob
  * being set or the blob being downloaded will end up in the filesystem.
  * Whichever one doesn't get saved will have an error. To ensure that one or the
  * other takes precedence, use {@link #waitForDownload} to allow the download to
  * complete before setting the blob.
  *
  * @param {string} url The URL at which to set the blob.
  * @param {!Blob} blob The blob to set.
  * @param {string=} opt_name The name of the file. If this isn't given, it's
  *     determined from the URL.
  * @return {!goog.async.Deferred} The deferred used for registering callbacks on
  *     success or on error. This can be cancelled just like a {@link #download}
  *     Deferred. The objects passed to the errback will be
  *     {@link goog.net.FileDownloader.Error}s.
  */
 goog.net.FileDownloader.prototype.setBlob = function(url, blob, opt_name) {
   var name = this.sanitize_(opt_name || this.urlToName_(url));
   var download = new goog.net.FileDownloader.Download_(url, this);
   this.downloads_[url] = download;
   download.blob = blob;
   this.getDir_(download.url, goog.fs.DirectoryEntry.Behavior.CREATE_EXCLUSIVE).
       addCallback(function(dir) {
         return dir.getFile(
             name, goog.fs.DirectoryEntry.Behavior.CREATE_EXCLUSIVE);
       }).
       addCallback(goog.bind(this.fileSuccess_, this, download)).
       addErrback(goog.bind(this.error_, this, download));
   return download.deferred.branch(true /* opt_propagateCancel */);
 };


 /**
  * The callback called when an XHR becomes available from the XHR pool.
  *
  * @param {!goog.net.FileDownloader.Download_} download The download object for
  *     this download.
  * @param {!goog.net.XhrIo} xhr The XhrIo object for downloading the page.
  * @private
  */
 goog.net.FileDownloader.prototype.gotXhr_ = function(download, xhr) {
   if (download.cancelled) {
     this.freeXhr_(xhr);
     return;
   }

   this.eventHandler_.listen(
       xhr, goog.net.EventType.SUCCESS,
       goog.bind(this.xhrSuccess_, this, download));
   this.eventHandler_.listen(
       xhr, [goog.net.EventType.ERROR, goog.net.EventType.ABORT],
       goog.bind(this.error_, this, download));
   this.eventHandler_.listen(
       xhr, goog.net.EventType.READY,
       goog.bind(this.freeXhr_, this, xhr));

   download.xhr = xhr;
   xhr.setResponseType(goog.net.XhrIo.ResponseType.ARRAY_BUFFER);
   xhr.send(download.url);
 };


 /**
  * The callback called when an XHR succeeds in downloading a remote file.
  *
  * @param {!goog.net.FileDownloader.Download_} download The download object for
  *     this download.
  * @private
  */
 goog.net.FileDownloader.prototype.xhrSuccess_ = function(download) {
   if (download.cancelled) {
     return;
   }

   var name = this.sanitize_(this.getName_(
       /** @type {!goog.net.XhrIo} */ (download.xhr)));
   var resp = /** @type {ArrayBuffer} */ (download.xhr.getResponse());
   if (!resp) {
     // This should never happen - it indicates the XHR hasn't completed, has
     // failed or has been cleaned up.  If it does happen (eg. due to a bug
     // somewhere) we don't want to pass null to getBlob - it's not valid and
     // triggers a bug in some versions of WebKit causing it to crash.
     this.error_(download);
     return;
   }

   download.blob = goog.fs.getBlob(resp);
   delete download.xhr;

   this.getDir_(download.url, goog.fs.DirectoryEntry.Behavior.CREATE_EXCLUSIVE).
       addCallback(function(dir) {
         return dir.getFile(
             name, goog.fs.DirectoryEntry.Behavior.CREATE_EXCLUSIVE);
       }).
       addCallback(goog.bind(this.fileSuccess_, this, download)).
       addErrback(goog.bind(this.error_, this, download));
 };


 /**
  * The callback called when a file that will be used for saving a file is
  * successfully opened.
  *
  * @param {!goog.net.FileDownloader.Download_} download The download object for
  *     this download.
  * @param {!goog.fs.FileEntry} file The newly-opened file object.
  * @private
  */
 goog.net.FileDownloader.prototype.fileSuccess_ = function(download, file) {
   if (download.cancelled) {
     file.remove();
     return;
   }

   download.file = file;
   file.createWriter().
       addCallback(goog.bind(this.fileWriterSuccess_, this, download)).
       addErrback(goog.bind(this.error_, this, download));
 };


 /**
  * The callback called when a file writer is succesfully created for writing a
  * file to the filesystem.
  *
  * @param {!goog.net.FileDownloader.Download_} download The download object for
  *     this download.
  * @param {!goog.fs.FileWriter} writer The newly-created file writer object.
  * @private
  */
 goog.net.FileDownloader.prototype.fileWriterSuccess_ = function(
     download, writer) {
   if (download.cancelled) {
     download.file.remove();
     return;
   }

   download.writer = writer;
   writer.write(/** @type {!Blob} */ (download.blob));
   this.eventHandler_.listenOnce(
       writer,
       goog.fs.FileSaver.EventType.WRITE_END,
       goog.bind(this.writeEnd_, this, download));
 };


 /**
  * The callback called when file writing ends, whether or not it's successful.
  *
  * @param {!goog.net.FileDownloader.Download_} download The download object for
  *     this download.
  * @private
  */
 goog.net.FileDownloader.prototype.writeEnd_ = function(download) {
   if (download.cancelled || download.writer.getError()) {
     this.error_(download, download.writer.getError());
     return;
   }

   delete this.downloads_[download.url];
   download.deferred.callback(download.blob);
 };


 /**
  * The error callback for all asynchronous operations. Ensures that all stages
  * of a given download are cleaned up, and emits the error event.
  *
  * @param {!goog.net.FileDownloader.Download_} download The download object for
  *     this download.
  * @param {goog.fs.Error=} opt_err The file error object. Only defined if the
  *     error was raised by the file API.
  * @private
  */
 goog.net.FileDownloader.prototype.error_ = function(download, opt_err) {
   if (download.file) {
     download.file.remove();
   }

   if (download.cancelled) {
     return;
   }

   delete this.downloads_[download.url];
   download.deferred.errback(
       new goog.net.FileDownloader.Error(download, opt_err));
 };


 /**
  * Abort the download of the given URL.
  *
  * @param {!goog.net.FileDownloader.Download_} download The download to abort.
  * @private
  */
 goog.net.FileDownloader.prototype.cancel_ = function(download) {
   goog.dispose(download);
   delete this.downloads_[download.url];
 };


 /**
  * Get the directory for a given URL. If the directory already exists when this
  * is called, it will contain exactly one file: the downloaded file.
  *
  * This not only calls the FileSystem API's getFile method, but attempts to
  * distribute the files so that they don't overload the filesystem. The spec
  * says directories can't contain more than 5000 files
  * (http://www.w3.org/TR/file-system-api/#directories), so this ensures that
  * each file is put into a subdirectory based on its SHA1 hash.
  *
  * All parameters are the same as in the FileSystem API's Entry#getFile method.
  *
  * @param {string} url The URL corresponding to the directory to get.
  * @param {goog.fs.DirectoryEntry.Behavior} behavior The behavior to pass to the
  *     underlying method.
  * @return {!goog.async.Deferred} The deferred DirectoryEntry object.
  * @private
  */
 goog.net.FileDownloader.prototype.getDir_ = function(url, behavior) {
   // 3 hex digits provide 16**3 = 4096 different possible dirnames, which is
   // less than the maximum of 5000 entries. Downloaded files should be
   // distributed roughly evenly throughout the directories due to the hash
   // function, allowing many more than 5000 files to be downloaded.
   //
   // The leading ` ensures that no illegal dirnames are accidentally used. % was
   // previously used, but Chrome has a bug (as of 12.0.725.0 dev) where
   // filenames are URL-decoded before checking their validity, so filenames
   // containing e.g. '%3f' (the URL-encoding of :, an invalid character) are
   // rejected.
   var dirname = '`' + Math.abs(goog.crypt.hash32.encodeString(url)).
       toString(16).substring(0, 3);

   return this.dir_.
       getDirectory(dirname, goog.fs.DirectoryEntry.Behavior.CREATE).
       addCallback(function(dir) {
         return dir.getDirectory(this.sanitize_(url), behavior);
       }, this);
 };


 /**
  * Get the file for a given URL. This will only retrieve files that have already
  * been saved; it shouldn't be used for creating the file in the first place.
  * This is because the filename isn't necessarily determined by the URL, but by
  * the headers of the XHR response.
  *
  * @param {string} url The URL corresponding to the file to get.
  * @return {!goog.async.Deferred} The deferred FileEntry object.
  * @private
  */
 goog.net.FileDownloader.prototype.getFile_ = function(url) {
   return this.getDir_(url, goog.fs.DirectoryEntry.Behavior.DEFAULT).
       addCallback(function(dir) {
         return dir.listDirectory().addCallback(function(files) {
           goog.asserts.assert(files.length == 1);
           // If the filesystem somehow gets corrupted and we end up with an
           // empty directory here, it makes sense to just return the normal
           // file-not-found error.
           return files[0] || dir.getFile('file');
         });
       });
 };


 /**
  * Sanitize a string so it can be safely used as a file or directory name for
  * the FileSystem API.
  *
  * @param {string} str The string to sanitize.
  * @return {string} The sanitized string.
  * @private
  */
 goog.net.FileDownloader.prototype.sanitize_ = function(str) {
   // Add a prefix, since certain prefixes are disallowed for paths. None of the
   // disallowed prefixes start with '`'. We use ` rather than % for escaping the
   // filename due to a Chrome bug (as of 12.0.725.0 dev) where filenames are
   // URL-decoded before checking their validity, so filenames containing e.g.
   // '%3f' (the URL-encoding of :, an invalid character) are rejected.
   return '`' + str.replace(/[\/\\<>:?*"|%`]/g, encodeURIComponent).
       replace(/%/g, '`');
 };


 /**
  * Gets the filename specified by the XHR. This first attempts to parse the
  * Content-Disposition header for a filename and, failing that, falls back on
  * deriving the filename from the URL.
  *
  * @param {!goog.net.XhrIo} xhr The XHR containing the response headers.
  * @return {string} The filename.
  * @private
  */
 goog.net.FileDownloader.prototype.getName_ = function(xhr) {
   var disposition = xhr.getResponseHeader('Content-Disposition');
   var match = disposition &&
       disposition.match(/^attachment *; *filename="(.*)"$/i);
   if (match) {
     // The Content-Disposition header allows for arbitrary backslash-escaped
     // characters (usually " and \). We want to unescape them before using them
     // in the filename.
     return match[1].replace(/\\(.)/g, '$1');
   }

   return this.urlToName_(xhr.getLastUri());
 };


 /**
  * Extracts the basename from a URL.
  *
  * @param {string} url The URL.
  * @return {string} The basename.
  * @private
  */
 goog.net.FileDownloader.prototype.urlToName_ = function(url) {
   var segments = url.split('/');
   return segments[segments.length - 1];
 };


 /**
  * Remove all event listeners for an XHR and release it back into the pool.
  *
  * @param {!goog.net.XhrIo} xhr The XHR to free.
  * @private
  */
 goog.net.FileDownloader.prototype.freeXhr_ = function(xhr) {
   goog.events.removeAll(xhr);
   this.pool_.addFreeObject(xhr);
 };


 /** @override */
 goog.net.FileDownloader.prototype.disposeInternal = function() {
   delete this.dir_;
   goog.dispose(this.eventHandler_);
   delete this.eventHandler_;
   goog.object.forEach(this.downloads_, function(download) {
     download.deferred.cancel();
   }, this);
   delete this.downloads_;
   goog.dispose(this.pool_);
   delete this.pool_;

   goog.net.FileDownloader.base(this, 'disposeInternal');
 };


 /**
  * The error object for FileDownloader download errors.
  *
  * @param {!goog.net.FileDownloader.Download_} download The download object for
  *     the download in question.
  * @param {goog.fs.Error=} opt_fsErr The file error object, if this was a file
  *     error.
  *
  * @constructor
  * @extends {goog.debug.Error}
  * @final
  */
 goog.net.FileDownloader.Error = function(download, opt_fsErr) {
   goog.net.FileDownloader.Error.base(
       this, 'constructor', 'Error capturing URL ' + download.url);

   /**
    * The URL the event relates to.
    * @type {string}
    */
   this.url = download.url;

   if (download.xhr) {
     this.xhrStatus = download.xhr.getStatus();
     this.xhrErrorCode = download.xhr.getLastErrorCode();
     this.message += ': XHR failed with status ' + this.xhrStatus +
         ' (error code ' + this.xhrErrorCode + ')';
   } else if (opt_fsErr) {
     this.fileError = opt_fsErr;
     this.message += ': file API failed (' + opt_fsErr.message + ')';
   }
 };
 goog.inherits(goog.net.FileDownloader.Error, goog.debug.Error);


 /**
  * The status of the XHR. Only set if the error was caused by an XHR failure.
  * @type {number|undefined}
  */
 goog.net.FileDownloader.Error.prototype.xhrStatus;


 /**
  * The error code of the XHR. Only set if the error was caused by an XHR
  * failure.
  * @type {goog.net.ErrorCode|undefined}
  */
 goog.net.FileDownloader.Error.prototype.xhrErrorCode;


 /**
  * The file API error. Only set if the error was caused by the file API.
  * @type {goog.fs.Error|undefined}
  */
 goog.net.FileDownloader.Error.prototype.fileError;


 /**
  * A struct containing the data for a single download.
  *
  * @param {string} url The URL for the file being downloaded.
  * @param {!goog.net.FileDownloader} downloader The parent FileDownloader.
  * @extends {goog.Disposable}
  * @constructor
  * @private
  */
 goog.net.FileDownloader.Download_ = function(url, downloader) {
   goog.net.FileDownloader.Download_.base(this, 'constructor');

   /**
    * The URL for the file being downloaded.
    * @type {string}
    */
   this.url = url;

   /**
    * The Deferred that will be fired when the download is complete.
    * @type {!goog.async.Deferred}
    */
   this.deferred = new goog.async.Deferred(
       goog.bind(downloader.cancel_, downloader, this));

   /**
    * Whether this download has been cancelled by the user.
    * @type {boolean}
    */
   this.cancelled = false;

   /**
    * The XhrIo object for downloading the file. Only set once it's been
    * retrieved from the pool.
    * @type {goog.net.XhrIo}
    */
   this.xhr = null;

   /**
    * The name of the blob being downloaded. Only sey once the XHR has completed,
    * if it completed successfully.
    * @type {?string}
    */
   this.name = null;

   /**
    * The downloaded blob. Only set once the XHR has completed, if it completed
    * successfully.
    * @type {Blob}
    */
   this.blob = null;

   /**
    * The file entry where the blob is to be stored. Only set once it's been
    * loaded from the filesystem.
    * @type {goog.fs.FileEntry}
    */
   this.file = null;

   /**
    * The file writer for writing the blob to the filesystem. Only set once it's
    * been loaded from the filesystem.
    * @type {goog.fs.FileWriter}
    */
   this.writer = null;
 };
 goog.inherits(goog.net.FileDownloader.Download_, goog.Disposable);


 /** @override */
 goog.net.FileDownloader.Download_.prototype.disposeInternal = function() {
   this.cancelled = true;
   if (this.xhr) {
     this.xhr.abort();
   } else if (this.writer && this.writer.getReadyState() ==
              goog.fs.FileSaver.ReadyState.WRITING) {
     this.writer.abort();
   }

   goog.net.FileDownloader.Download_.base(this, 'disposeInternal');
 };