| <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <html> |
| <head> |
| <META http-equiv="Content-Type" content="text/html; charset=UTF-8"> |
| <meta content="Apache Forrest" name="Generator"> |
| <meta name="Forrest-version" content="0.9-dev"> |
| <meta name="Forrest-skin-name" content="pelt"> |
| <title>Lucene</title> |
| <link type="text/css" href="../../../../skin/basic.css" rel="stylesheet"> |
| <link media="screen" type="text/css" href="../../../../skin/screen.css" rel="stylesheet"> |
| <link media="print" type="text/css" href="../../../../skin/print.css" rel="stylesheet"> |
| <link type="text/css" href="../../../../skin/profile.css" rel="stylesheet"> |
| <script src="../../../../skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="../../../../skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="../../../../skin/fontsize.js" language="javascript" type="text/javascript"></script> |
| <link rel="shortcut icon" href="../../../../favicon.ico"> |
| </head> |
| <body onload="init()"> |
| <script type="text/javascript">ndeSetTextSize();</script> |
| <div id="top"> |
| <!--+ |
| |breadtrail |
| +--> |
| <div class="breadtrail"> |
| <a href="http://www.apache.org/">apache</a> > <a href="http://lenya.apache.org/">lenya</a><script src="../../../../skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script> |
| </div> |
| <!--+ |
| |header |
| +--> |
| <div class="header"> |
| <!--+ |
| |start group logo |
| +--> |
| <div class="grouplogo"> |
| <a href=""><img class="logoImage" alt="Lenya" src="../../../../images/apache-lenya-light.png" title=""></a> |
| </div> |
| <!--+ |
| |end group logo |
| +--> |
| <!--+ |
| |start Project Logo |
| +--> |
| <div class="projectlogo"> |
| <a href=""></a> |
| </div> |
| <!--+ |
| |end Project Logo |
| +--> |
| <!--+ |
| |start Search |
| +--> |
| <div class="searchbox"> |
| <form action="http://www.google.com/search" method="get" class="roundtopsmall"> |
| <input value="lenya.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with ');" size="25" name="q" id="query" type="text" value="Search the site with "> |
| <input name="Search" value="Search" type="submit"> |
| </form> |
| </div> |
| <!--+ |
| |end search |
| +--> |
| <!--+ |
| |start Tabs |
| +--> |
| <ul id="tabs"> |
| <li> |
| <a class="unselected" href="../../../../index.html">Project</a> |
| </li> |
| <li> |
| <a class="unselected" href="../../../../docs/index.html">Developer</a> |
| </li> |
| <li> |
| <a class="unselected" href="../../../../community/index.html">Community</a> |
| </li> |
| <li> |
| <a class="unselected" href="../../../../docs/2_0_x/index.html">Version 2.0</a> |
| </li> |
| <li class="current"> |
| <a class="selected" href="../../../../docs/1_2_x/index.html">Version 1.2</a> |
| </li> |
| </ul> |
| <!--+ |
| |end Tabs |
| +--> |
| </div> |
| </div> |
| <div id="main"> |
| <div id="publishedStrip"> |
| <!--+ |
| |start Subtabs |
| +--> |
| <div id="level2tabs"></div> |
| <!--+ |
| |end Endtabs |
| +--> |
| <script type="text/javascript"><!-- |
| document.write("Last Published: " + document.lastModified); |
| // --></script> |
| </div> |
| <!--+ |
| |breadtrail |
| +--> |
| <div class="breadtrail"> |
| |
| |
| </div> |
| <!--+ |
| |start Menu, mainarea |
| +--> |
| <!--+ |
| |start Menu |
| +--> |
| <div id="menu"> |
| <div onclick="SwitchMenu('menu_selected_1.1', '../../../../skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('../../../../skin/images/chapter_open.gif');">Version 1.2</div> |
| <div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/index.html">Index</a> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.2', '../../../../skin/')" id="menu_1.1.2Title" class="menutitle">Installation</div> |
| <div id="menu_1.1.2" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/installation/index.html">Download</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/installation/subversion.html">Subversion Access</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/installation/source_version.html">Install Instructions</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.3', '../../../../skin/')" id="menu_1.1.3Title" class="menutitle">Tutorial</div> |
| <div id="menu_1.1.3" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/tutorial/index.html">Introduction</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/tutorial/understanding_lenya.html">1. Understanding Lenya</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/tutorial/installing_lenya.html">2. Installing Lenya</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/tutorial/anatomy_of_the_pipeline.html">3. Anatomy of the Pipeline</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/tutorial/editing_in_lenya.html">4. Editing in Lenya</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/tutorial/custom_navigation.html">5. Custom Navigation in Lenya</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/tutorial/mod_proxy_and_lenya.html">6a. Mod Proxy and Lenya</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/tutorial/mod_proxy_and_lenya_continued.html">6b. Mod Proxy and Lenya</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.4', '../../../../skin/')" id="menu_1.1.4Title" class="menutitle">How-To</div> |
| <div id="menu_1.1.4" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/how-to/faq.html">FAQ</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/how-to/new_publication.html">New Publication</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/how-to/deploy_publication.html">Deploy Publication</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/how-to/look_and_feel.html">Look and Feel</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/how-to/custom_resourcetype.html">Custom Resource Type</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/how-to/new_mime_type.html">Adding Mime Types</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/how-to/site_navigation.html">Site Navigation</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/how-to/ldap_authentication.html">LDAP Authentication</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/how-to/cms_menus.html">CMS Menus</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/how-to/cms_screens.html">CMS Screens</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/how-to/search.html">Search Publications</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/how-to/external_data.html">External Data</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/how-to/unittests.html">Unit Tests</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_selected_1.1.5', '../../../../skin/')" id="menu_selected_1.1.5Title" class="menutitle" style="background-image: url('../../../../skin/images/chapter_open.gif');">Components</div> |
| <div id="menu_selected_1.1.5" class="selectedmenuitemgroup" style="display: block;"> |
| <div onclick="SwitchMenu('menu_1.1.5.1', '../../../../skin/')" id="menu_1.1.5.1Title" class="menutitle">Access Control</div> |
| <div id="menu_1.1.5.1" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/accesscontrol/terms.html">Basic Terms</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/accesscontrol/accesscontrollers.html">Access Controllers</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/accesscontrol/accesscontrollerresolvers.html">Access Controller Resolvers</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/accesscontrol/authenticators.html">Authenticators</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/accesscontrol/authorizers.html">Authorizers</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/accesscontrol/policymanagers.html">Policies and Policy Managers</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/accesscontrol/accreditablemanagers.html">Accreditable Managers</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/accesscontrol/ssl.html">SSL Encryption</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.2', '../../../../skin/')" id="menu_1.1.5.2Title" class="menutitle">Authoring</div> |
| <div id="menu_1.1.5.2" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/authoring/adding-document-creator.html">Adding a new document creator</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/authoring/image-upload.html">Image Upload</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/authoring/openoffice.html">OpenOffice</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.3', '../../../../skin/')" id="menu_1.1.5.3Title" class="menutitle">Deployment</div> |
| <div id="menu_1.1.5.3" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/deployment/proxying.html">Proxying</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.4', '../../../../skin/')" id="menu_1.1.5.4Title" class="menutitle">Editors</div> |
| <div id="menu_1.1.5.4" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/editors/htmlform.html">HTML Form Editor</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/editors/1form.html">HTML One Form Editor</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/editors/bxe.html">Bitflux Editor</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/editors/kupu.html">Kupu</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/editors/xopus.html">Xopus</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.5', '../../../../skin/')" id="menu_1.1.5.5Title" class="menutitle">Layout</div> |
| <div id="menu_1.1.5.5" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/layout/navigation.html">Navigation</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/layout/xhtml-templating.html">XHTML templating</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/layout/static-resources.html">Serving static resources</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/layout/lenya-menubar.html">Lenya Menubar</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.6', '../../../../skin/')" id="menu_1.1.5.6Title" class="menutitle">Publication</div> |
| <div id="menu_1.1.5.6" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/publication/pageenvelopemodule.html">PageEnvelopeModule</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/publication/siteTree.html">Site tree</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.7', '../../../../skin/')" id="menu_1.1.5.7Title" class="menutitle">Resource Types</div> |
| <div id="menu_1.1.5.7" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/resource-types/resource-types.html">Resource Types</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.8', '../../../../skin/')" id="menu_1.1.5.8Title" class="menutitle">Revision Control</div> |
| <div id="menu_1.1.5.8" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/revisioncontrol/revisioncontroller.html">Revision Controller</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/revisioncontrol/rcml.html">RCML</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/revisioncontrol/checkin.html">Check In</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/revisioncontrol/checkout.html">Check Out</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/revisioncontrol/version.html">Revisions</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/revisioncontrol/rollback.html">Rollback</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.9', '../../../../skin/')" id="menu_1.1.5.9Title" class="menutitle">Repository</div> |
| <div id="menu_1.1.5.9" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/repository/index.html">WebDAV Servers</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_selected_1.1.5.10', '../../../../skin/')" id="menu_selected_1.1.5.10Title" class="menutitle" style="background-image: url('../../../../skin/images/chapter_open.gif');">Search</div> |
| <div id="menu_selected_1.1.5.10" class="selectedmenuitemgroup" style="display: block;"> |
| <div class="menupage"> |
| <div class="menupagetitle">Searching with Lucene</div> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.11', '../../../../skin/')" id="menu_1.1.5.11Title" class="menutitle">URI Handling</div> |
| <div id="menu_1.1.5.11" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/uri-handling/URIParametrizer.html">URI Parametrizer</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/uri-handling/standardURI.html">URI Scheme</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/uri-handling/usecases.html">Usecases</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.12', '../../../../skin/')" id="menu_1.1.5.12Title" class="menutitle">Asset Management</div> |
| <div id="menu_1.1.5.12" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/asset-management/management.html">Asset Management</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.13', '../../../../skin/')" id="menu_1.1.5.13Title" class="menutitle">Link Management</div> |
| <div id="menu_1.1.5.13" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/link-management/link-management.html">Link Management</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.14', '../../../../skin/')" id="menu_1.1.5.14Title" class="menutitle">Meta Data</div> |
| <div id="menu_1.1.5.14" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/metadata/metadata.html">Meta Data Handling</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.15', '../../../../skin/')" id="menu_1.1.5.15Title" class="menutitle">Multilingual Documents</div> |
| <div id="menu_1.1.5.15" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/multilingual/multilingual.html">Multilingual Document Handling</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.16', '../../../../skin/')" id="menu_1.1.5.16Title" class="menutitle">Tasks</div> |
| <div id="menu_1.1.5.16" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/tasks/concept.html">The Task Concept</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/tasks/defining.html">Defining Tasks</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/tasks/taskaction.html">The TaskAction</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/tasks/scheduling.html">Scheduling a Task</a> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.16.5', '../../../../skin/')" id="menu_1.1.5.16.5Title" class="menutitle">Included tasks</div> |
| <div id="menu_1.1.5.16.5" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/tasks/anttask.html">AntTask</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/tasks/publisher.html">DefaultFilePublisher</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/tasks/exporter.html">StaticHTMLExporter</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/tasks/mailtask.html">MailTask</a> |
| </div> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/tasks/development.html">Developing Tasks</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.17', '../../../../skin/')" id="menu_1.1.5.17Title" class="menutitle">Workflow</div> |
| <div id="menu_1.1.5.17" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/workflow/terms.html">Terms</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/workflow/state-machine.html">The State Machine</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/workflow/configuration.html">Configuration</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/workflow/implementation.html">Implementation</a> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.5.18', '../../../../skin/')" id="menu_1.1.5.18Title" class="menutitle">Site Management</div> |
| <div id="menu_1.1.5.18" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/sitemanagement/archive.html">Archive</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/sitemanagement/copy.html">Copy</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/sitemanagement/deactivate.html">Deactivate</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/sitemanagement/delete.html">Delete</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/sitemanagement/deletetrash.html">Delete the trash</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/sitemanagement/move.html">Move</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/sitemanagement/moveupdown.html">Move Up/Down</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/sitemanagement/rename.html">Rename</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/components/sitemanagement/restore.html">Restore</a> |
| </div> |
| </div> |
| </div> |
| <div onclick="SwitchMenu('menu_1.1.6', '../../../../skin/')" id="menu_1.1.6Title" class="menutitle">Miscellaneous</div> |
| <div id="menu_1.1.6" class="menuitemgroup"> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/misc/namespaces.html">Namespaces</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/misc/reserved_names.html">Reserved Names</a> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../docs/1_2_x/misc/xinclude-processor.html">XInclude Processor</a> |
| </div> |
| </div> |
| <div class="menuitem"> |
| <a href="../../../../apidocs/1.2/index.html">1.2 API (Javadoc)</a> |
| </div> |
| </div> |
| <div id="credit"></div> |
| <div id="roundbottom"> |
| <img style="display: none" class="corner" height="15" width="15" alt="" src="../../../../skin/images/rc-b-l-15-1body-2menu-3menu.png"></div> |
| <!--+ |
| |alternative credits |
| +--> |
| <div id="credit2"> |
| <a href="http://wiki.apache.org/lenya/MeetingFreiburg2008"><img border="0" title="Meeting Freiburg 2008" alt="Meeting Freiburg 2008 - logo" src="../../../../images/apache-lenya-meeting-freiburg-2008.png" style="width: 160px;height: 40px;"></a> |
| </div> |
| </div> |
| <!--+ |
| |end Menu |
| +--> |
| <!--+ |
| |start content |
| +--> |
| <div id="content"> |
| <div title="Portable Document Format" class="pdflink"> |
| <a class="dida" href="lucene.pdf"><img alt="PDF -icon" src="../../../../skin/images/pdfdoc.gif" class="skin"><br> |
| PDF</a> |
| </div> |
| <h1>Lucene</h1> |
| <div id="front-matter"> |
| <div id="minitoc-area"> |
| <ul class="minitoc"> |
| <li> |
| <a href="#Overview">Overview</a> |
| </li> |
| <li> |
| <a href="#Crawling+a+website">Crawling a website</a> |
| </li> |
| <li> |
| <a href="#Creating+an+index+from+the+command+line">Creating an index from the command line</a> |
| </li> |
| <li> |
| <a href="#Indexing+XML+documents">Indexing XML documents</a> |
| </li> |
| <li> |
| <a href="#Extract+text+from+a+PDF+document">Extract text from a PDF document</a> |
| </li> |
| </ul> |
| </div> |
| </div> |
| |
| |
| <a name="N1000E"></a><a name="Overview"></a> |
| <h2 class="h3">Overview</h2> |
| <div class="section"> |
| <p>There are two URL for the search screen relative to your publication: |
| <span class="codefrag">search-live/lucene</span> to search the live area, <span class="codefrag">search-authoring/lucene</span> to |
| search the authoring area of your publication.</p> |
| <p>If you want to customize the layout of the search screen for your publication, |
| place a stylesheet at <span class="codefrag">lenya/xslt/search/search-and-results.xsl</span> |
| relative to your publication root.</p> |
| <p>Lucene indices are stored within the <span class="codefrag">work/search/index/$AREA/index</span> directory of your |
| publication. The <span class="codefrag">work/search/htdocs_dump/$AREA</span> directory holds content from crawling (see below).</p> |
| <p>The search pipelines are defined within <span class="codefrag">global-sitemap.xmap</span> and <span class="codefrag">lucene.xmap</span> |
| </p> |
| </div> |
| |
| |
| <a name="N10035"></a><a name="Crawling+a+website"></a> |
| <h2 class="h3">Crawling a website</h2> |
| <div class="section"> |
| <p> |
| Crawl a website by running |
| </p> |
| <pre class="code"> |
| |
| ant -f build/lenya/webapp/lenya/bin/crawl_and_index.xml -Dcrawler.xconf=build/lenya/webapp/lenya/pubs/default/config/search/crawler-live.xconf crawl |
| |
| </pre> |
| <p> |
| Note that there is a search.properties file in build/lenya/webapp/lenya/bin that you may have to change. |
| crawler.xconf needs to have the following elements: |
| </p> |
| <pre class="code"> |
| |
| <crawler> |
| <user-agent>lenya</user-agent> |
| |
| <base-url href="http://lenya.apache.org/index.html"/> |
| <scope-url href="http://lenya.apache.org/"/> |
| |
| <uri-list src="work/search/lucene/uris.txt"/> |
| <htdocs-dump-dir src="work/search/lucene/htdocs_dump/lenya.apache.org"/> |
| |
| <!-- <robots src="robots.txt" domain="lenya.apache.org"/> --> |
| </crawler> |
| |
| </pre> |
| <ul> |
| |
| <li>user-agent is the HTTP user agent that will be used for the crawler</li> |
| |
| <li>base-url is the start URL for the crawler</li> |
| |
| <li>scope-url limits the scope of the crawl to that site, or subdirectory</li> |
| |
| <li>uri-list is a reference to a file that will contain all URLs found during the crawl</li> |
| |
| <li>htdocs-dump-dir specifies the directory that will contain the crawled site</li> |
| |
| <li>robots specifies an (optional) robots file that follows the <a href="http://www.robotstxt.org/wc/norobots.html">Robot Exclusion Standard</a> |
| </li> |
| |
| </ul> |
| <p> |
| If you want to fine-tune the crawling (and do not have access to the remote server to put a robots.txt there), then |
| you can specify exlusions in a local robots.txt file: |
| </p> |
| <pre class="code"> |
| |
| # lenya.apache.org |
| |
| User-agent: * |
| Disallow: /there_seems_to_be_a_bug_within_websphinx_Robot_Exclusion.html |
| |
| #Disallow: |
| |
| User-agent: lenya |
| Disallow: /do/not/crawl/this/page.html |
| |
| </pre> |
| </div> |
| |
| |
| <a name="N10069"></a><a name="Creating+an+index+from+the+command+line"></a> |
| <h2 class="h3">Creating an index from the command line</h2> |
| <div class="section"> |
| <pre class="code"> |
| |
| ant -f build/lenya/webapp/lenya/bin/crawl_and_index.xml -Dlucene.xconf=build/lenya/webapp/lenya/pubs/default/config/search/lucene-live.xconf index |
| |
| </pre> |
| <p> |
| Note that there is a search.properties file in build/lenya/webapp/lenya/bin that you may have to change. |
| lucene-live.xconf has the following elements |
| </p> |
| <pre class="code"> |
| |
| <lucene> |
| <update-index type="new"/> |
| <!-- |
| <update-index type="incremental"/> |
| --> |
| |
| <index-dir src="../../work/search/lucene/index/index"/> |
| <htdocs-dump-dir src="../../work/search/lucene/htdocs_dump"/> |
| |
| <indexer class="org.apache.lenya.lucene.index.DefaultIndexer"/> |
| </lucene> |
| |
| </pre> |
| </div> |
| |
| |
| <a name="N1007B"></a><a name="Indexing+XML+documents"></a> |
| <h2 class="h3">Indexing XML documents</h2> |
| <div class="section"> |
| <p> |
| In order to index XML documents one needs to configure the <span class="codefrag">org.apache.lenya.lucene.index.ConfigurableIndexer</span> (see above). |
| </p> |
| <p> |
| With namespaces: |
| </p> |
| <pre class="code"> |
| |
| <?xml version="1.0"?> |
| |
| <luc:document xmlns:luc="http://apache.org/cocoon/lenya/lucene/1.0"> |
| <luc:field name="currwfstate" type="Text" xpath="/wf:history/wf:version[last()]/@state"> |
| <namespace prefix="wf">http://apache.org/cocoon/lenya/workflow/1.0</namespace> |
| </luc:field> |
| </luc:document> |
| |
| </pre> |
| <p> |
| Concatenating element values and setting default values in case element value doesn't exist: |
| </p> |
| <pre class="code"> |
| |
| <?xml version="1.0"?> |
| |
| <luc:document xmlns:luc="http://apache.org/cocoon/lenya/lucene/1.0"> |
| <luc:field name="title" type="Text" xpath="/article/head/title"/> |
| <luc:field name="subtitle" type="Text" xpath="/article/head/subtitle"/> |
| <luc:field name="lead" type="UnStored" xpath="/article/head/abstract"/> |
| <luc:field name="contents" type="UnStored" xpath="/"/> |
| <luc:field name="author" type="UnStored"/> |
| <namespace prefix="lenya">http://apache.org/cocoon/lenya/page-envelope/1.0</namespace> |
| <namespace prefix="dc">http://purl.org/dc/elements/1.1/</namespace> |
| <xpath>/*/lenya:meta/dc:contributor</xpath> |
| </luc:field> |
| <luc:field name="date" type="Text"> |
| <namespace prefix="lenya">http://apache.org/cocoon/lenya/page-envelope/1.0</namespace> |
| <xpath default="1969">/*/lenya:meta/year</xpath><text>.</text><xpath default="02">/*/lenya:meta/month</xpath><text>.</text><xpath default="16">/*/lenya:meta/day</xpath> |
| </luc:field> |
| </luc:document> |
| |
| </pre> |
| </div> |
| |
| |
| <a name="N10096"></a><a name="Extract+text+from+a+PDF+document"></a> |
| <h2 class="h3">Extract text from a PDF document</h2> |
| <div class="section"> |
| <pre class="code"> |
| |
| ant -f build/lenya/webapp/lenya/bin/crawl_and_index.xml -Dhtdocs.dump.dir=build/lenya/webapp/lenya/pubs/default/work/search/lucene/htdocs_dump xpdf |
| |
| </pre> |
| <p> |
| Also see the targets <span class="codefrag">pdfbox</span> and <span class="codefrag">pdfadobe</span>. |
| </p> |
| </div> |
| |
| |
| </div> |
| <!--+ |
| |end content |
| +--> |
| <div class="clearboth"> </div> |
| </div> |
| <div id="footer"> |
| <!--+ |
| |start bottomstrip |
| +--> |
| <div class="lastmodified"> |
| <script type="text/javascript"><!-- |
| document.write("Last Published: " + document.lastModified); |
| // --></script> |
| </div> |
| <div class="copyright"> |
| Copyright © |
| 2002-2007 <a href="http://www.apache.org/licenses/LICENSE-2.0">The Apache Software Foundation.</a> |
| </div> |
| <div id="feedback"> |
| Send feedback about the website to: |
| <a id="feedbackto" href="mailto:dev@lenya.apache.org?subject=Feedback%C2%A0for%C2%A0docs/1_2_x/components/search/lucene.html">dev@lenya.apache.org</a> |
| </div> |
| <!--+ |
| |end bottomstrip |
| +--> |
| </div> |
| </body> |
| </html> |