| /* |
| * Copyright 2010 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Author: sligocki@google.com (Shawn Ligocki) |
| |
| #ifndef NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_FACTORY_H_ |
| #define NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_FACTORY_H_ |
| |
| #include <set> |
| #include <vector> |
| |
| #include "net/instaweb/rewriter/public/central_controller_interface.h" |
| #include "net/instaweb/rewriter/public/central_controller_interface_adapter.h" |
| #include "pagespeed/kernel/base/abstract_mutex.h" |
| #include "pagespeed/kernel/base/basictypes.h" |
| #include "pagespeed/kernel/base/function.h" |
| #include "pagespeed/kernel/base/null_statistics.h" |
| #include "pagespeed/kernel/base/scoped_ptr.h" |
| #include "pagespeed/kernel/base/statistics.h" |
| #include "pagespeed/kernel/base/string.h" |
| #include "pagespeed/kernel/base/string_util.h" |
| #include "pagespeed/kernel/base/thread_system.h" |
| #include "pagespeed/kernel/thread/queued_worker_pool.h" |
| |
| namespace pagespeed { namespace js { struct JsTokenizerPatterns; } } |
| |
| namespace net_instaweb { |
| |
| class CacheHtmlInfoFinder; |
| class CriticalCssFinder; |
| class CriticalImagesFinder; |
| class CriticalLineInfoFinder; |
| class CriticalSelectorFinder; |
| class FileSystem; |
| class FlushEarlyInfoFinder; |
| class ExperimentMatcher; |
| class Hasher; |
| class MessageHandler; |
| class MobilizeCachedFinder; |
| class NamedLockManager; |
| class NonceGenerator; |
| class ProcessContext; |
| class PropertyCache; |
| class ServerContext; |
| class RewriteDriver; |
| class RewriteOptions; |
| class RewriteOptionsManager; |
| class RewriteStats; |
| class SHA1Signature; |
| class Scheduler; |
| class StaticAssetManager; |
| class Timer; |
| class UrlAsyncFetcher; |
| class UrlNamer; |
| class UsageDataReporter; |
| class UserAgentMatcher; |
| class UserAgentNormalizer; |
| |
| // Manages the construction and ownership of most objects needed to create |
| // RewriteDrivers. If you have your own versions of these classes (specific |
| // implementations of UrlAsyncFetcher, Hasher, etc.) you can make your own |
| // subclass of RewriteDriverFactory to use these by default. |
| class RewriteDriverFactory { |
| public: |
| // Helper for users of defer_cleanup; see below. |
| template<class T> class Deleter; |
| |
| enum WorkerPoolCategory { |
| kHtmlWorkers, |
| kRewriteWorkers, |
| kLowPriorityRewriteWorkers, |
| // Make sure to insert new values above this line. |
| kNumWorkerPools |
| }; |
| |
| // Takes ownership of thread_system. |
| RewriteDriverFactory(const ProcessContext& process_context, |
| ThreadSystem* thread_system); |
| |
| // Initializes default options we want to hard-code into the |
| // base-class to get consistency across deployments. Subclasses |
| // that override NewRewriteOptions() should call this method from |
| // their constructor. It is safe to call this multiple times. |
| void InitializeDefaultOptions(); |
| |
| // Static version of the above. If you are using that you may also need |
| // to call reset_default_options. |
| static void InitializeDefaultOptions(RewriteOptions* options); |
| |
| virtual ~RewriteDriverFactory(); |
| |
| // The RewriteDriverFactory will create objects of default type through the |
| // New* method from drived classes. Here are the objects that can be |
| // replaced before creating the RewriteDriver. |
| // Note: RewriteDriver takes ownership of these. |
| void set_html_parse_message_handler(MessageHandler* message_handler); |
| void set_message_handler(MessageHandler* message_handler); |
| void set_file_system(FileSystem* file_system); |
| void set_hasher(Hasher* hasher); |
| void set_nonce_generator(NonceGenerator* nonce_generator); |
| void set_url_namer(UrlNamer* url_namer); |
| void set_signature(SHA1Signature* signature); |
| void set_timer(Timer* timer); |
| void set_usage_data_reporter(UsageDataReporter* reporter); |
| |
| // Set up a directory for slurped files for HTML and resources. If |
| // read_only is true, then it will only read from these files, and |
| // this will eliminate the usage of any other url_fetcher. If |
| // read_only is false, then the existing url fetcher will be used as |
| // a fallback if the slurped file is not found, and slurped files will |
| // be subsequently written so they don't have to be fetched from |
| // the Internet again. |
| // |
| // You must set the slurp directory prior to calling ComputeUrlAsyncFetcher. |
| void set_slurp_directory(const StringPiece& directory); |
| void set_slurp_read_only(bool read_only); |
| void set_slurp_print_urls(bool read_only); |
| |
| // Setting HTTP caching on causes both the fetcher and the async |
| // fecher to return cached versions. |
| void set_force_caching(bool u) { force_caching_ = u; } |
| |
| // You can call set_base_url_async_fetcher to set up real async fetching |
| // for real serving or for modeling of live traffic. |
| // |
| // These fetchers may be used directly when serving traffic, or they |
| // may be aggregated with other fetchers (e.g. for slurping). |
| // |
| // You cannot set the base URL fetcher once ComputeUrlAsyncFetcher has |
| // been called. |
| void set_base_url_async_fetcher(UrlAsyncFetcher* url_fetcher); |
| // Takes ownership of distributed_fetcher. |
| void set_base_distributed_async_fetcher(UrlAsyncFetcher* distributed_fetcher); |
| bool set_filename_prefix(StringPiece p); |
| |
| // Determines whether Slurping is enabled. |
| bool slurping_enabled() const { return !slurp_directory_.empty(); } |
| |
| MessageHandler* html_parse_message_handler(); |
| MessageHandler* message_handler(); |
| FileSystem* file_system(); |
| NonceGenerator* nonce_generator(); |
| // TODO(sligocki): Remove hasher() and force people to make a NewHasher when |
| // they need one. |
| Hasher* hasher(); |
| UrlNamer* url_namer(); |
| UserAgentMatcher* user_agent_matcher(); |
| StaticAssetManager* static_asset_manager(); |
| SHA1Signature* signature(); |
| RewriteOptions* default_options() { return default_options_.get(); } |
| virtual RewriteOptionsManager* NewRewriteOptionsManager(); |
| |
| // These accessors are *not* thread-safe until after the first call, as they |
| // do unlocked lazy initialization, so they must be called at least once prior |
| // to starting threads. Normally this is done by CreateServerContext() or |
| // InitServerContext(). |
| Timer* timer(); |
| NamedLockManager* lock_manager(); |
| QueuedWorkerPool* WorkerPool(WorkerPoolCategory pool); |
| Scheduler* scheduler(); |
| UsageDataReporter* usage_data_reporter(); |
| const pagespeed::js::JsTokenizerPatterns* js_tokenizer_patterns() const { |
| return js_tokenizer_patterns_; |
| } |
| const std::vector<const UserAgentNormalizer*>& user_agent_normalizers(); |
| |
| // Computes URL fetchers using the base fetcher, and optionally, |
| // slurp_directory and slurp_read_only. These are not thread-safe; |
| // they must be called once prior to spawning threads, e.g. via |
| // CreateServerContext. |
| virtual UrlAsyncFetcher* ComputeUrlAsyncFetcher(); |
| virtual UrlAsyncFetcher* ComputeDistributedFetcher(); |
| |
| // Threadsafe mechanism to create a managed ServerContext. The |
| // ServerContext is owned by the factory, and should not be |
| // deleted directly. Currently it is not possible to delete a |
| // server context except by deleting the entire factory. |
| // |
| // Implemented in terms of NewServerContext(). |
| // |
| // Note that this is a convenience wrapper only. In particular, |
| // SystemServerContext creates ServerContexts by calling New and Init |
| // separately. |
| ServerContext* CreateServerContext(); |
| |
| // Initializes a ServerContext that has been new'd directly. This |
| // allows 2-phase initialization if required. There is no need to |
| // call this if you use CreateServerContext. |
| void InitServerContext(ServerContext* server_context); |
| |
| // Called from InitServerContext, but virtualized separately as it is |
| // platform-specific. This method must call on the server context: |
| // set_http_cache, set_metadata_cache, set_filesystem_metadata_cache, and |
| // MakePropertyCaches. |
| virtual void SetupCaches(ServerContext* server_context) = 0; |
| |
| // Returns true if this platform uses beacon-based measurements to make |
| // run-time decisions. This is used to determine how to configure various |
| // beacon-based filters. |
| virtual bool UseBeaconResultsInFilters() const = 0; |
| |
| // Provides an optional hook for adding rewrite passes to the HTML filter |
| // chain. This should be used for filters that are specific to a particular |
| // RewriteDriverFactory implementation. |
| virtual void AddPlatformSpecificRewritePasses(RewriteDriver* driver); |
| |
| // Provides an optional hook for adding rewriters to the .pagespeed. resource |
| // decoding chain. This should be used for rewriters that are specific to a |
| // particular RewriteDriverFactory implementation. The caller should only use |
| // the resulting driver for reconstructing a .pagespeed. resource, not for |
| // transforming HTML. Therefore, implementations should add any |
| // platform-specific rewriter whose id might appear in a .pagespeed. URL. |
| // This should be done independent of RewriteOptions, since we only store |
| // a single decoding driver globally to save memory. |
| virtual void AddPlatformSpecificDecodingPasses(RewriteDriver* driver); |
| |
| // Provides an optional hook for customizing the RewriteDriver object |
| // using the options set on it. This is called before |
| // RewriteDriver::AddFilters() and AddPlatformSpecificRewritePasses(). |
| virtual void ApplyPlatformSpecificConfiguration(RewriteDriver* driver); |
| |
| ThreadSystem* thread_system() { return thread_system_.get(); } |
| |
| // Return interface to various functions that workers need delegated |
| // to a central service. Depending on the implemenation, this may invoke |
| // RPCs. |
| CentralControllerInterfaceAdapter* central_controller_interface() { |
| return central_controller_interface_.get(); |
| } |
| |
| // Returns the set of directories that we (our our subclasses) have created |
| // thus far. |
| const StringSet& created_directories() const { |
| return created_directories_; |
| } |
| |
| bool async_rewrites() { return true; } |
| |
| // Collection of global statistics objects. This is thread-unsafe: |
| // it must be called prior to spawning threads, and after any calls |
| // to SetStatistics. Failing that, it will be initialized in the |
| // first call to InitServerContext(), which is thread-safe. |
| RewriteStats* rewrite_stats(); |
| |
| // statistics (default is NullStatistics). This can be overridden by calling |
| // SetStatistics, either from subclasses or externally. |
| Statistics* statistics() { return statistics_; } |
| |
| // Initializes statistics variables. This must be done at process |
| // startup to enable shared memory segments in Apache to be set up. |
| static void InitStats(Statistics* statistics); |
| |
| // Initializes static variables. Initialize/Terminate calls must be paired. |
| static void Initialize(); |
| static void Terminate(); |
| |
| // Does *not* take ownership of Statistics. |
| void SetStatistics(Statistics* stats); |
| |
| // Clean up all the factory-owned resources: fetchers, pools, |
| // Server Contexts, the Drivers owned by the Server Contexts, |
| // and worker threads. |
| virtual void ShutDown(); |
| |
| // Registers the directory as having been created by us. |
| void AddCreatedDirectory(const GoogleString& dir); |
| |
| // Creates a new empty RewriteOptions object, with no default settings. |
| // Generally configurations go factory's default_options() -> |
| // ServerContext::global_options() -> RewriteDriverFactory, |
| // but this method just provides a blank set of options. |
| virtual RewriteOptions* NewRewriteOptions(); |
| |
| // Creates a new empty RewriteOptions object meant for use for |
| // custom options from queries or headers. Default implementation just |
| // forwards to NewRewriteOptions(). |
| virtual RewriteOptions* NewRewriteOptionsForQuery(); |
| |
| // get/set the version placed into the X-[Mod-]Page(s|-S)peed header. |
| const GoogleString& version_string() const { return version_string_; } |
| void set_version_string(const StringPiece& version_string) { |
| version_string.CopyToString(&version_string_); |
| } |
| |
| // Causes the given function to be Run after all the threads are shutdown, |
| // in order to do any needed resource cleanups. The Deleter<T> template below |
| // may be useful for object deletion cleanups. |
| void defer_cleanup(Function* f) { deferred_cleanups_.push_back(f); } |
| |
| // Queues an object for deletion at the last phase of RewriteDriverFactory |
| // destruction. |
| template<class T> void TakeOwnership(T* obj) { |
| defer_cleanup(new RewriteDriverFactory::Deleter<T>(obj)); |
| } |
| |
| // Base method that returns true if the given ip is a debug ip. |
| virtual bool IsDebugClient(const GoogleString& ip) const { |
| return false; |
| } |
| |
| // Creates an ExperimentMatcher, which is used to match clients or sessions to |
| // a specific experiment. |
| virtual ExperimentMatcher* NewExperimentMatcher(); |
| |
| // Control the number of simultaneous expensive CPU operations going on at |
| // once. Invokes Run on your callback at a time when it is OK to do the |
| // expensive operation, or Cancel if you should not perform the operation. |
| // Depending on the implemation, may queue the callback for theoretically |
| // unbounded time. |
| void ScheduleExpensiveOperation(ExpensiveOperationCallback* callback); |
| |
| protected: |
| bool FetchersComputed() const; |
| virtual void StopCacheActivity(); |
| StringPiece filename_prefix(); |
| |
| // Used by subclasses to indicate that a ServerContext has been |
| // terminated. Returns true if this was the last server context |
| // known to this factory. |
| bool TerminateServerContext(ServerContext* server_context); |
| |
| // Implementors of RewriteDriverFactory must supply default definitions |
| // for each of these methods, although they may be overridden via set_ |
| // methods above. These methods all instantiate objects and transfer |
| // ownership to the caller. |
| virtual UrlAsyncFetcher* DefaultAsyncUrlFetcher() = 0; |
| virtual MessageHandler* DefaultHtmlParseMessageHandler() = 0; |
| virtual MessageHandler* DefaultMessageHandler() = 0; |
| virtual FileSystem* DefaultFileSystem() = 0; |
| virtual NonceGenerator* DefaultNonceGenerator(); |
| virtual Timer* DefaultTimer(); |
| virtual SHA1Signature* DefaultSignature(); |
| |
| virtual Hasher* NewHasher() = 0; |
| |
| // Creates a new ServerContext* object. ServerContext itself must be |
| // overridden per Factory as it has at least one pure virtual method. |
| virtual ServerContext* NewServerContext() = 0; |
| |
| // Create a new ServerContext used for decoding only. Unlike NewServerContext, |
| // the resulting ServerContext should not be fresh, but should have some of |
| // its platform dependencies injected --- but just enough for decoding URLs, |
| // and not full operation. At the time of writing it needs the timer, |
| // url namer, hasher, message handler, and stats; expensive stuff like |
| // cache backends is not needed, however. |
| // |
| // You may find InitStubDecodingServerContext() useful for doing that, as it |
| // will inject all of these from what's available in 'this'. |
| virtual ServerContext* NewDecodingServerContext() = 0; |
| |
| virtual UrlAsyncFetcher* DefaultDistributedUrlFetcher() { return NULL; } |
| |
| virtual CriticalCssFinder* DefaultCriticalCssFinder(); |
| virtual CriticalImagesFinder* DefaultCriticalImagesFinder( |
| ServerContext* server_context); |
| virtual CriticalSelectorFinder* DefaultCriticalSelectorFinder( |
| ServerContext* server_context); |
| |
| // Note: this one may return NULL. |
| virtual MobilizeCachedFinder* DefaultMobilizeCachedFinder( |
| ServerContext* server_context); |
| |
| // Default implementation returns NULL. |
| virtual CacheHtmlInfoFinder* DefaultCacheHtmlInfoFinder( |
| PropertyCache* cache, ServerContext* server_context); |
| |
| // Default implementation returns NULL. |
| virtual FlushEarlyInfoFinder* DefaultFlushEarlyInfoFinder(); |
| |
| // Default implementation returns a valid CriticalSelectorFinder. |
| virtual CriticalLineInfoFinder* DefaultCriticalLineInfoFinder( |
| ServerContext* server_context); |
| |
| // They may also supply a custom lock manager. The default implementation |
| // will use the file system. |
| virtual NamedLockManager* DefaultLockManager(); |
| |
| // They may also supply a custom Url namer. The default implementation |
| // performs sharding and appends '.pagespeed.<filter>.<hash>.<extension>'. |
| virtual UrlNamer* DefaultUrlNamer(); |
| |
| virtual UserAgentMatcher* DefaultUserAgentMatcher(); |
| virtual UsageDataReporter* DefaultUsageDataReporter(); |
| |
| // Provides an optional hook to add user-agent normalizers specific to |
| // needs of a specific RewriteDriverFactory implementation. The new entries |
| // should be appended to the end of *out (without clearing it), and should |
| // still be owned by the RewriteDriverFactory subclass. |
| // |
| // Default implementation does nothing. |
| virtual void AddPlatformSpecificUserAgentNormalizers( |
| std::vector<const UserAgentNormalizer*>* out); |
| |
| // Subclasses can override this to create an appropriately-sized thread |
| // pool for their environment. The default implementation will always |
| // make one with a single thread. |
| virtual QueuedWorkerPool* CreateWorkerPool(WorkerPoolCategory pool, |
| StringPiece name); |
| |
| // Subclasses can override this method to request load-shedding to happen |
| // if the low-priority work pool has too many inactive sequences queued up |
| // waiting (the returned value will be a threshold beyond which things |
| // will start getting dropped). The default implementation returns |
| // kNoLoadShedding, which disables the feature. See also |
| // QueuedWorkerPool::set_load_shedding_threshold |
| virtual int LowPriorityLoadSheddingThreshold() const; |
| |
| // Subclasses can override this to create an appropriate Scheduler |
| // subclass if the default isn't acceptable. |
| virtual Scheduler* CreateScheduler(); |
| |
| // Called before creating the url fetchers. |
| virtual void FetcherSetupHooks(); |
| |
| // Override this if you want to change what directory locks go into |
| // when using the default filesystem-based lock manager. The default is |
| // filename_prefix() |
| virtual StringPiece LockFilePrefix(); |
| |
| // Initializes the StaticAssetManager. |
| virtual void InitStaticAssetManager( |
| StaticAssetManager* static_asset_manager) {} |
| |
| // Sets up enough of platform dependencies in 'context' to be able to use |
| // it for decoding URLs, based on this object's values and some stubs. |
| void InitStubDecodingServerContext(ServerContext* context); |
| |
| // Allow sub-classes to pick which CentralController they want to use. |
| virtual CentralControllerInterface* CreateCentralController(); |
| |
| // For use in tests. |
| void RebuildDecodingDriverForTests(ServerContext* server_context); |
| |
| void reset_default_options(RewriteOptions* new_defaults); |
| |
| private: |
| // Creates a StaticAssetManager instance. Default implementation creates an |
| // instance that disables serving of filter javascript via gstatic |
| // (gstatic.com is the domain google uses for serving static content). |
| StaticAssetManager* DefaultStaticAssetManager(); |
| |
| void SetupSlurpDirectories(); |
| |
| void InitDecodingDriver(ServerContext* server_context); |
| |
| // This should only be called during startup. Takes ownership of interface. |
| void set_central_controller_interface(CentralControllerInterface* interface); |
| |
| scoped_ptr<MessageHandler> html_parse_message_handler_; |
| scoped_ptr<MessageHandler> message_handler_; |
| scoped_ptr<FileSystem> file_system_; |
| UrlAsyncFetcher* url_async_fetcher_; |
| UrlAsyncFetcher* distributed_async_fetcher_; |
| scoped_ptr<UrlAsyncFetcher> base_url_async_fetcher_; |
| scoped_ptr<UrlAsyncFetcher> base_distributed_async_fetcher_; |
| scoped_ptr<Hasher> hasher_; |
| scoped_ptr<NonceGenerator> nonce_generator_; |
| scoped_ptr<SHA1Signature> signature_; |
| scoped_ptr<UrlNamer> url_namer_; |
| scoped_ptr<UserAgentMatcher> user_agent_matcher_; |
| |
| // Lazily filled-in list of UA normalizers, including the default ones |
| // this class adds, and any additional ones added by user_agent_normalizers() |
| // calling subclass' AddPlatformSpecificUserAgentNormalizers on this. |
| std::vector<const UserAgentNormalizer*> user_agent_normalizers_; |
| scoped_ptr<StaticAssetManager> static_asset_manager_; |
| scoped_ptr<Timer> timer_; |
| scoped_ptr<Scheduler> scheduler_; |
| scoped_ptr<UsageDataReporter> usage_data_reporter_; |
| // RE2 patterns needed for JsTokenizer. |
| const pagespeed::js::JsTokenizerPatterns* js_tokenizer_patterns_; |
| |
| GoogleString filename_prefix_; |
| GoogleString slurp_directory_; |
| bool force_caching_; |
| bool slurp_read_only_; |
| bool slurp_print_urls_; |
| |
| scoped_ptr<ThreadSystem> thread_system_; |
| |
| // protected by server_context_mutex_; |
| typedef std::set<ServerContext*> ServerContextSet; |
| ServerContextSet server_contexts_; |
| scoped_ptr<AbstractMutex> server_context_mutex_; |
| |
| // Stores options with hard-coded defaults and adjustments from |
| // the core system, subclasses, and command-line. |
| scoped_ptr<RewriteOptions> default_options_; |
| |
| // Keep around a RewriteDriver just for decoding resource URLs, using |
| // the default options. This is possible because the id->RewriteFilter |
| // table is fully constructed independent of the options; we however |
| // still inject options into some of the Decode methods since we also |
| // need to honor things like forbids. We also have a special |
| // ServerContext just for it, to avoid connecting it to any particular |
| // pre-existing one. |
| scoped_ptr<ServerContext> decoding_server_context_; |
| scoped_ptr<RewriteDriver> decoding_driver_; |
| |
| // Manage locks for output resources. |
| scoped_ptr<NamedLockManager> lock_manager_; |
| |
| scoped_ptr<CentralControllerInterfaceAdapter> central_controller_interface_; |
| |
| // Default statistics implementation which can be overridden by children |
| // by calling SetStatistics(). |
| NullStatistics null_statistics_; |
| Statistics* statistics_; |
| |
| StringSet created_directories_; |
| |
| std::vector<QueuedWorkerPool*> worker_pools_; |
| |
| // These must be initialized after the RewriteDriverFactory subclass has been |
| // constructed so it can use a the statistics() override. |
| scoped_ptr<RewriteStats> rewrite_stats_; |
| |
| // To assist with subclass destruction-order, subclasses can register |
| // functions to run late in the destructor. |
| std::vector<Function*> deferred_cleanups_; |
| |
| // Version string to put into HTTP response headers. |
| // TODO(sligocki): Remove. Redundant with RewriteOptions::x_header_value(). |
| GoogleString version_string_; |
| |
| // The hostname we're running on. Used to set the same field in ServerContext. |
| GoogleString hostname_; |
| |
| DISALLOW_COPY_AND_ASSIGN(RewriteDriverFactory); |
| }; |
| |
| // Helper for users of RewriterDriverFactory::defer_cleanup --- instantiates |
| // into objects that call the appropriate delete operator when Run. |
| template<class T> class RewriteDriverFactory::Deleter : public Function { |
| public: |
| explicit Deleter(T* obj) : obj_(obj) {} |
| virtual void Run() { delete obj_; } |
| private: |
| T* obj_; |
| DISALLOW_COPY_AND_ASSIGN(Deleter); |
| }; |
| |
| } // namespace net_instaweb |
| |
| #endif // NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_FACTORY_H_ |