blob: 0aa7c989598a8856d5554012de60e5778f373ba3 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
<meta name="Forrest-version" content="0.8">
<meta name="Forrest-skin-name" content="lucene">
<title>Welcome to Nutch!</title>
<link type="text/css" href="skin/basic.css" rel="stylesheet">
<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
<link type="text/css" href="skin/profile.css" rel="stylesheet">
<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
<link rel="shortcut icon" href="images/favicon.ico">
</head>
<body onload="init()">
<script type="text/javascript">ndeSetTextSize();</script>
<div id="top">
<!--+
|breadtrail
+-->
<div class="breadtrail">
<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
</div>
<!--+
|header
+-->
<div class="header">
<!--+
|start group logo
+-->
<div class="grouplogo">
<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="images/lucene_green_150.gif" title="Apache Lucene"></a>
</div>
<!--+
|end group logo
+-->
<!--+
|start Project Logo
+-->
<div class="projectlogo">
<a href="http://lucene.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
</div>
<!--+
|end Project Logo
+-->
<!--+
|start Search
+-->
<div class="searchbox">
<form action="http://search.lucidimagination.com/p:nutch" method="get" class="roundtopsmall">
<input onFocus="getBlank (this, 'Search the site with Solr');" size="25" name="q" id="query" type="text" value="Search the site with Solr">&nbsp;
<input name="Search" value="Search" type="submit">
</form>
<div style="position: relative; top: -5px; left: -10px">Powered by <a href="http://www.lucidimagination.com" style="color: #033268">Lucid Imagination</a>
</div>
</div>
<!--+
|end search
+-->
<!--+
|start Tabs
+-->
<ul id="tabs">
<li class="current">
<a class="selected" href="index.html">Main</a>
</li>
<li>
<a class="unselected" href="http://wiki.apache.org/nutch/">Wiki</a>
</li>
<li>
<a class="unselected" href="http://issues.apache.org/jira/browse/Nutch">Jira</a>
</li>
</ul>
<!--+
|end Tabs
+-->
</div>
</div>
<div id="main">
<div id="publishedStrip">
<!--+
|start Subtabs
+-->
<div id="level2tabs"></div>
<!--+
|end Endtabs
+-->
<script type="text/javascript"><!--
document.write("Last Published: " + document.lastModified);
// --></script>
</div>
<!--+
|breadtrail
+-->
<div class="breadtrail">
&nbsp;
</div>
<!--+
|start Menu, mainarea
+-->
<!--+
|start Menu
+-->
<div id="menu">
<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Project</div>
<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
<div class="menupage">
<div class="menupagetitle">News</div>
</div>
<div class="menuitem">
<a href="about.html">About</a>
</div>
<div class="menuitem">
<a href="credits.html">Credits</a>
</div>
<div class="menuitem">
<a href="http://www.cafepress.com/nutch/">Buy Stuff</a>
</div>
</div>
<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Documentation</div>
<div id="menu_1.2" class="menuitemgroup">
<div class="menuitem">
<a href="http://wiki.apache.org/nutch/FAQ">FAQ</a>
</div>
<div class="menuitem">
<a href="http://wiki.apache.org/nutch/">Wiki</a>
</div>
<div class="menuitem">
<a href="tutorial.html">Tutorial (0.7.2)</a>
</div>
<div class="menuitem">
<a href="tutorial8.html">Tutorial (0.8.x)</a>
</div>
<div class="menuitem">
<a href="bot.html">Robot </a>
</div>
<div class="menuitem">
<a href="i18n.html">i18n</a>
</div>
<div class="menuitem">
<a href="apidocs-1.0/index.html">API Docs (1.0)</a>
</div>
<div class="menuitem">
<a href="apidocs-0.9/index.html">API Docs (0.9)</a>
</div>
<div class="menuitem">
<a href="apidocs-0.8.x/index.html">API Docs (0.8.x)</a>
</div>
<div class="menuitem">
<a href="apidocs/index.html">API Docs (0.7.2)</a>
</div>
<div class="menuitem">
<a href="http://lucene.zones.apache.org:8080/hudson/job/Nutch-Nightly/ws/trunk/build/docs/api/index.html">API Docs (nightly)</a>
</div>
</div>
<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Resources</div>
<div id="menu_1.3" class="menuitemgroup">
<div class="menuitem">
<a href="release/">Download</a>
</div>
<div class="menuitem">
<a href="nightly.html">Nightly builds</a>
</div>
<div class="menuitem">
<a href="mailing_lists.html">Mailing Lists</a>
</div>
<div class="menuitem">
<a href="issue_tracking.html">Issue Tracking</a>
</div>
<div class="menuitem">
<a href="version_control.html">Version Control</a>
</div>
</div>
<div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Related Projects</div>
<div id="menu_1.4" class="menuitemgroup">
<div class="menuitem">
<a href="http://lucene.apache.org/java/">Lucene Java</a>
</div>
<div class="menuitem">
<a href="http://lucene.apache.org/hadoop/">Hadoop</a>
</div>
<div class="menuitem">
<a href="http://incubator.apache.org/solr/">Solr</a>
</div>
</div>
<div id="credit">
<hr>
<a href="http://forrest.apache.org/"><img border="0" title="Built with Apache Forrest" alt="Built with Apache Forrest - logo" src="images/built-with-forrest-button.png" style="width: 88px;height: 31px;"></a>
</div>
<div id="roundbottom">
<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
<!--+
|alternative credits
+-->
<div id="credit2"></div>
</div>
<!--+
|end Menu
+-->
<!--+
|start content
+-->
<div id="content">
<div title="Portable Document Format" class="pdflink">
<a class="dida" href="index.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
PDF</a>
</div>
<h1>Welcome to Nutch!</h1>
<div id="minitoc-area">
<ul class="minitoc">
<li>
<a href="#News">News</a>
<ul class="minitoc">
<li>
<a href="#14+August+2009+-+Lucene+at+US+ApacheCon">14 August 2009 - Lucene at US ApacheCon</a>
</li>
<li>
<a href="#23+March+2009+-+Apache+Nutch+1.0+Released">23 March 2009 - Apache Nutch 1.0 Released</a>
</li>
<li>
<a href="#09+February+2009+-+Lucene+at+ApacheCon+Europe+2009+in%0A%09%09%09Amsterdam">09 February 2009 - Lucene at ApacheCon Europe 2009 in
Amsterdam</a>
</li>
<li>
<a href="#2+April+2007%3A+Nutch+0.9+Released">2 April 2007: Nutch 0.9 Released</a>
</li>
<li>
<a href="#24+September+2006%3A+Nutch+0.8.1+Released">24 September 2006: Nutch 0.8.1 Released</a>
</li>
<li>
<a href="#25+July+2006%3A+Nutch+0.8+Released">25 July 2006: Nutch 0.8 Released</a>
</li>
<li>
<a href="#31+March+2006%3A+Nutch+0.7.2+Released">31 March 2006: Nutch 0.7.2 Released</a>
</li>
<li>
<a href="#1+October+2005%3A+Nutch+0.7.1+Released">1 October 2005: Nutch 0.7.1 Released</a>
</li>
<li>
<a href="#17+August+2005%3A+Nutch+0.7+Released">17 August 2005: Nutch 0.7 Released</a>
</li>
<li>
<a href="#June+2005%3A+Nutch+graduates+from+Incubator">June 2005: Nutch graduates from Incubator</a>
</li>
<li>
<a href="#January+2005%3A+Nutch+Joins+Apache+Incubator">January 2005: Nutch Joins Apache Incubator</a>
</li>
<li>
<a href="#September+2004%3A+Creative+Commons+launches+Nutch-based+Search">September 2004: Creative Commons launches Nutch-based Search</a>
</li>
<li>
<a href="#September+2004%3A+Oregon+State+University+switches+to+Nutch">September 2004: Oregon State University switches to Nutch</a>
</li>
</ul>
</li>
</ul>
</div>
<a name="N1000D"></a><a name="News"></a>
<h2 class="h3">News</h2>
<div class="section">
<a name="N10013"></a><a name="14+August+2009+-+Lucene+at+US+ApacheCon"></a>
<h3 class="h4">14 August 2009 - Lucene at US ApacheCon</h3>
<p>
<a href="http://www.us.apachecon.com/c/acus2009/" title="ApacheCon US 2009">
<img alt="ApacheCon Logo" class="float-right" src="http://www.apache.org/events/current-event-125x125.png">
</a>
ApacheCon US is once again in the Bay Area and Lucene is coming
along for the ride! The Lucene community has planned two full
days of talks, plus a meetup and the usual bevy of training.
With a well-balanced mix of first time and veteran ApacheCon
speakers, the
<a href="http://www.us.apachecon.com/c/acus2009/schedule#lucene">Lucene track</a>
at ApacheCon US promises to have something for everyone. Be sure
not to miss:
</p>
<p> Training:</p>
<ul>
<li>
<a href="http://www.us.apachecon.com/c/acus2009/sessions/437">Lucene Boot Camp</a>
- A two day training session, Nov. 2nd &amp; 3rd
</li>
<li>
<a href="http://www.us.apachecon.com/c/acus2009/sessions/375">Solr Day</a>
- A one day training session, Nov. 2nd
</li>
</ul>
<p>Thursday, Nov. 5th</p>
<ul>
<li>
<a href="http://www.us.apachecon.com/c/acus2009/sessions/428">Introduction to the Lucene Ecosystem
</a>
- Grant Ingersoll @ 9:00
</li>
<li>
<a href="http://www.us.apachecon.com/c/acus2009/sessions/461">Lucene Basics and New Features</a>
- Michael Busch @ 10:00
</li>
<li>
<a href="http://www.us.apachecon.com/c/acus2009/sessions/331">Apache Solr: Out of the Box</a>
- Chris Hostetter @ 14:00
</li>
<li>
<a href="http://www.us.apachecon.com/c/acus2009/sessions/427">Introduction to Nutch</a>
- Andrzej Bialecki @ 15:00
</li>
<li>
<a href="http://www.us.apachecon.com/c/acus2009/sessions/430">Lucene and Solr Performance Tuning</a>
- Mark Miller @ 16:30
</li>
</ul>
<p>Friday, Nov. 6th</p>
<ul>
<li>
<a href="http://www.us.apachecon.com/c/acus2009/sessions/332">Implementing an Information Retrieval
Framework for an Organizational Repository</a>
- Sithu D Sudarsan @ 9:00
</li>
<li>
<a href="http://www.us.apachecon.com/c/acus2009/sessions/333">Apache Mahout - Going from raw data to
Information</a>
- Isabel Drost @ 10:00
</li>
<li>
<a href="http://www.us.apachecon.com/c/acus2009/sessions/334">MIME Magic with Apache Tika</a>
- Jukka Zitting @ 11:30
</li>
<li>
<a href="http://www.us.apachecon.com/c/acus2009/sessions/335">Building Intelligent Search Applications
with the Lucene Ecosystem</a>
- Ted Dunning @ 14:00
</li>
<li>
<a href="http://www.us.apachecon.com/c/acus2009/sessions/462">Realtime Search</a>
- Jason Rutherglen @ 15:00
</li>
</ul>
<a name="N10091"></a><a name="23+March+2009+-+Apache+Nutch+1.0+Released"></a>
<h3 class="h4">23 March 2009 - Apache Nutch 1.0 Released</h3>
<p>The 1.0 release of Nutch is now available. This release includes several major feature improvements
such as new indexing framework, new scoring framework, Apache Solr integration just to mention a few.
See <a href="http://www.apache.org/dist/lucene/nutch/CHANGES-1.0.txt">
list of changes</a> made in this version. The release is available
<a href="http://lucene.apache.org/nutch/release/">here</a>.</p>
<a name="N100A3"></a><a name="09+February+2009+-+Lucene+at+ApacheCon+Europe+2009+in%0A%09%09%09Amsterdam"></a>
<h3 class="h4">09 February 2009 - Lucene at ApacheCon Europe 2009 in
Amsterdam</h3>
<p>
<a href="http://www.eu.apachecon.com/c/aceu2009/" title="ApacheCon EU 2009">
<img alt="ApacheCon EU 2009 Logo" class="float-right" src="http://www.eu.apachecon.com/page_attachments/0000/0115/125x125_basic.gif">
</a>
Lucene will be extremely well represented at
<a href="http://www.eu.apachecon.com/c/aceu2009/">ApacheCon EU 2009</a>
in Amsterdam, Netherlands this March 23-27, 2009:
</p>
<ul>
<li>
<a href="http://eu.apachecon.com/c/aceu2009/sessions/197">Lucene Boot Camp</a>
- A two day training session, March 23 &amp; 24th</li>
<li>
<a href="http://eu.apachecon.com/c/aceu2009/sessions/201">Solr Boot Camp</a> - A one day training session, March 24th</li>
<li>
<a href="http://eu.apachecon.com/c/aceu2009/sessions/136">Introducing Apache Mahout</a> - Grant Ingersoll. March 25th @ 10:30</li>
<li>
<a href="http://eu.apachecon.com/c/aceu2009/sessions/137">Lucene/Solr Case Studies</a> - Erik Hatcher. March 25th @ 11:30</li>
<li>
<a href="http://eu.apachecon.com/c/aceu2009/sessions/138">Advanced Indexing Techniques with Apache Lucene</a> - Michael Busch. March 25th @ 14:00</li>
<li>
<a href="http://eu.apachecon.com/c/aceu2009/sessions/251">Apache Solr - A Case Study</a> - Uri Boness. March 26th @ 17:30</li>
<li>
<a href="http://eu.apachecon.com/c/aceu2009/sessions/250">Best of breed - httpd, forrest, solr and droids</a> - Thorsten Scherler. March 27th @ 17:30</li>
<li>
<a href="http://eu.apachecon.com/c/aceu2009/sessions/165">Apache Droids - an intelligent standalone robot framework</a> - Thorsten Scherler. March 26th @ 15:00</li>
</ul>
<a name="N100EF"></a><a name="2+April+2007%3A+Nutch+0.9+Released"></a>
<h3 class="h4">2 April 2007: Nutch 0.9 Released</h3>
<p>The 0.9 release of Nutch is now available. This is the second release of Nutch
based entirely on the underlying Hadoop platform. This release includes several critical
bug fixes, as well as key speedups described in more detail at
<a href="http://blog.foofactory.fi/2007/03/twice-speed-half-size.html">Sami Siren's blog</a>.
See <a href="http://www.apache.org/dist/lucene/nutch/CHANGES-0.9.txt">
list of changes</a> made in this version. The release is available
<a href="http://lucene.apache.org/nutch/release/">here</a>.</p>
<a name="N10105"></a><a name="24+September+2006%3A+Nutch+0.8.1+Released"></a>
<h3 class="h4">24 September 2006: Nutch 0.8.1 Released</h3>
<p>The 0.8.1 release of Nutch is now available. This is a maintenance release to 0.8 branch fixing many serous bugs found in version 0.8.
See <a href="http://www.apache.org/dist/lucene/nutch/CHANGES-0.8.1.txt">
list of changes</a> made in this version. The release is available
<a href="http://lucene.apache.org/nutch/release/">here</a>.</p>
<a name="N10117"></a><a name="25+July+2006%3A+Nutch+0.8+Released"></a>
<h3 class="h4">25 July 2006: Nutch 0.8 Released</h3>
<p>The 0.8 release of Nutch is now available. This is the first release of Nutch based on
hadoop architecure. See <a href="http://svn.apache.org/viewvc/lucene/nutch/tags/release-0.8/CHANGES.txt?view=markup">
CHANGES.txt</a> for list of changes made in this version. The release is available
<a href="http://lucene.apache.org/nutch/release/">here</a>.</p>
<a name="N10129"></a><a name="31+March+2006%3A+Nutch+0.7.2+Released"></a>
<h3 class="h4">31 March 2006: Nutch 0.7.2 Released</h3>
<p>The 0.7.2 release of Nutch is now available. This is a bug fix release for 0.7 branch. See
<a href="http://svn.apache.org/viewcvs.cgi/lucene/nutch/branches/branch-0.7/CHANGES.txt?rev=390158">
CHANGES.txt</a> for details. The release is available
<a href="http://lucene.apache.org/nutch/release/">here</a>.</p>
<a name="N1013B"></a><a name="1+October+2005%3A+Nutch+0.7.1+Released"></a>
<h3 class="h4">1 October 2005: Nutch 0.7.1 Released</h3>
<p>The 0.7.1 release of Nutch is now available. This is a bug fix release. See
<a href="http://svn.apache.org/viewcvs.cgi/lucene/nutch/branches/branch-0.7/CHANGES.txt?rev=292986">
CHANGES.txt</a> for details. The release is available
<a href="http://lucene.apache.org/nutch/release/">here</a>.</p>
<a name="N1014D"></a><a name="17+August+2005%3A+Nutch+0.7+Released"></a>
<h3 class="h4">17 August 2005: Nutch 0.7 Released</h3>
<p>This is the first Nutch release as an Apache Lucene sub-project. See
<a href="http://svn.apache.org/viewcvs.cgi/lucene/nutch/trunk/CHANGES.txt?rev=233150">
CHANGES.txt</a> for details. The release is available
<a href="http://lucene.apache.org/nutch/release/">here</a>.</p>
<a name="N1015F"></a><a name="June+2005%3A+Nutch+graduates+from+Incubator"></a>
<h3 class="h4">June 2005: Nutch graduates from Incubator</h3>
<p>Nutch has now graduated from the Apache incubator, and is now
a Subproject of Lucene.</p>
<a name="N10169"></a><a name="January+2005%3A+Nutch+Joins+Apache+Incubator"></a>
<h3 class="h4">January 2005: Nutch Joins Apache Incubator</h3>
<p>Nutch is a two-year-old open source project, previously
hosted at Sourceforge and backed by its own non-profit
organization. The non-profit was founded in order to assign
copyright, so that we could retain the right to change the
license. We have now determined that the Apache license is the
appropriate license for Nutch and no longer require the
overhead of an independent non-profit organization. Nutch's
board of directors and its developers were both polled and
supported the move to the Apache foundation.</p>
<a name="N10173"></a><a name="September+2004%3A+Creative+Commons+launches+Nutch-based+Search"></a>
<h3 class="h4">September 2004: Creative Commons launches Nutch-based Search</h3>
<p>Creative Commons unveiled a beta version of its search
engine, which scours the web for text, images, audio, and video
free to re-use on certain terms a search refinement offered by
no other company or organization.</p>
<p>See the <a href="http://creativecommons.org/press-releases/entry/5064">Creative
Commons Press Release</a> for more details.</p>
<a name="N10184"></a><a name="September+2004%3A+Oregon+State+University+switches+to+Nutch"></a>
<h3 class="h4">September 2004: Oregon State University switches to Nutch</h3>
<p>Oregon State University is converting its searching
infrastructure from Googletm to the open source project
Nutch. The effort to replace the Googletm will realize
significant cost savings for Oregon State University, while
promoting both the Nutch Search Engine and transparency in
search engine use and management.</p>
<p>For more details see the announcement by OSU's <a href="http://osuosl.org/news_folder/nutch">Open Source
Lab</a>.</p>
</div>
</div>
<!--+
|end content
+-->
<div class="clearboth">&nbsp;</div>
</div>
<div id="footer">
<!--+
|start bottomstrip
+-->
<div class="lastmodified">
<script type="text/javascript"><!--
document.write("Last Published: " + document.lastModified);
// --></script>
</div>
<div class="copyright">
Copyright &copy;
2006 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
</div>
<div id="logos"></div>
<!--+
|end bottomstrip
+-->
</div>
</body>
</html>