moved MarkupParser#cleanLink( String, String ) to TextUtil.cleanString( String, String ) also moved constants PUNCTUATION_CHARS_ALLOWED and LEGACY_CHARS_ALLOWED from MarkupParser to TextUtil. The objetive of this move is to be able to break package / class cycles between WikiEngine and MarkupParser

commit: b39c263a418ec38f91db66871868617ae9db1ed7 [log] [tgz]
author: juanpablo <juanpablo@apache.org> Thu Jan 16 23:58:37 2020 +0100
committer: juanpablo <juanpablo@apache.org> Thu Jan 16 23:58:37 2020 +0100
tree: 42190ed74fa4b273a04ee645448fb2d0e907b502
parent: d518941177d75a13fbdd4ebe4ca2cf9bade3c238 [diff]
diff --git a/jspwiki-main/src/main/java/org/apache/wiki/WikiEngine.java b/jspwiki-main/src/main/java/org/apache/wiki/WikiEngine.java
index 9b522e4..2189371 100644
--- a/jspwiki-main/src/main/java/org/apache/wiki/WikiEngine.java
+++ b/jspwiki-main/src/main/java/org/apache/wiki/WikiEngine.java

@@ -40,7 +40,6 @@
 import org.apache.wiki.event.WikiPageEvent;
 import org.apache.wiki.i18n.InternationalizationManager;
 import org.apache.wiki.pages.PageManager;
-import org.apache.wiki.parser.MarkupParser;
 import org.apache.wiki.references.ReferenceManager;
 import org.apache.wiki.render.RenderingManager;
 import org.apache.wiki.rss.RSSGenerator;
@@ -848,7 +847,7 @@
     // FIXME: Should use servlet context as a default instead of a constant.
     public String getApplicationName() {
         final String appName = TextUtil.getStringProperty( m_properties, PROP_APPNAME, Release.APPNAME );
-        return MarkupParser.cleanLink( appName );
+        return TextUtil.cleanString( appName, TextUtil.PUNCTUATION_CHARS_ALLOWED );
     }
 
     /**

diff --git a/jspwiki-main/src/main/java/org/apache/wiki/parser/MarkupParser.java b/jspwiki-main/src/main/java/org/apache/wiki/parser/MarkupParser.java
index 0bcdad6..4acb8a7 100644
--- a/jspwiki-main/src/main/java/org/apache/wiki/parser/MarkupParser.java
+++ b/jspwiki-main/src/main/java/org/apache/wiki/parser/MarkupParser.java

@@ -26,6 +26,7 @@
 import org.apache.wiki.StringTransmutator;
 import org.apache.wiki.WikiContext;
 import org.apache.wiki.WikiEngine;
+import org.apache.wiki.util.TextUtil;
 import org.jdom2.Element;
 
 import java.io.BufferedReader;
@@ -80,13 +81,6 @@
     /** If set to "true", all external links are tagged with 'rel="nofollow"' */
     public static final String PROP_USERELNOFOLLOW = "jspwiki.translatorReader.useRelNofollow";
 
-    /** Lists all punctuation characters allowed in WikiMarkup. These will not be cleaned away. This is for compatibility for older versions
-        of JSPWiki. */
-    protected static final String LEGACY_CHARS_ALLOWED = "._";
-
-    /** Lists all punctuation characters allowed in page names. */
-    public static final String PUNCTUATION_CHARS_ALLOWED = " ()&+,-=._$";
-
     public static final String HASHLINK = "hashlink";
 
     /** Name of the outlink image; relative path to the JSPWiki directory. */
@@ -341,7 +335,7 @@
      *  @since 2.0
      */
     public static String cleanLink( final String link ) {
-        return cleanLink( link, PUNCTUATION_CHARS_ALLOWED );
+        return TextUtil.cleanString( link, TextUtil.PUNCTUATION_CHARS_ALLOWED );
     }
 
     /**
@@ -354,62 +348,7 @@
      *  @since 2.6
      */
     public static String wikifyLink( final String link ) {
-        return cleanLink( link, LEGACY_CHARS_ALLOWED );
-    }
-
-    /**
-     *  Cleans a Wiki name based on a list of characters.  Also, any multiple whitespace is collapsed into a single space, and any
-     *  leading or trailing space is removed.
-     *
-     *  @param link Link to be cleared. Null is safe, and causes this to return null.
-     *  @param allowedChars Characters which are allowed in the string.
-     *  @return A cleaned link.
-     *
-     *  @since 2.6
-     */
-    public static String cleanLink( String link, final String allowedChars ) {
-        if( link == null ) {
-            return null;
-        }
-
-        link = link.trim();
-        final StringBuilder clean = new StringBuilder( link.length() );
-
-        //  Remove non-alphanumeric characters that should not be put inside WikiNames.  Note that all valid Unicode letters are
-        //  considered okay for WikiNames. It is the problem of the WikiPageProvider to take care of actually storing that information.
-        //
-        //  Also capitalize things, if necessary.
-
-        boolean isWord = true;  // If true, we've just crossed a word boundary
-        boolean wasSpace = false;
-        for( int i = 0; i < link.length(); i++ ) {
-            char ch = link.charAt(i);
-
-            //  Cleans away repetitive whitespace and only uses the first one.
-            if( Character.isWhitespace(ch) ) {
-                if( wasSpace ) {
-                    continue;
-                }
-
-                wasSpace = true;
-            } else {
-                wasSpace = false;
-            }
-
-            //  Check if it is allowed to use this char, and capitalize, if necessary.
-            if( Character.isLetterOrDigit( ch ) || allowedChars.indexOf( ch ) != -1 ) {
-                // Is a letter
-                if( isWord ) {
-                    ch = Character.toUpperCase( ch );
-                }
-                clean.append( ch );
-                isWord = false;
-            } else {
-                isWord = true;
-            }
-        }
-
-        return clean.toString();
+        return TextUtil.cleanString( link, TextUtil.LEGACY_CHARS_ALLOWED );
     }
 
 }

diff --git a/jspwiki-main/src/main/java/org/apache/wiki/search/LuceneSearchProvider.java b/jspwiki-main/src/main/java/org/apache/wiki/search/LuceneSearchProvider.java
index e6e4711..1cfcc30 100644
--- a/jspwiki-main/src/main/java/org/apache/wiki/search/LuceneSearchProvider.java
+++ b/jspwiki-main/src/main/java/org/apache/wiki/search/LuceneSearchProvider.java

@@ -59,7 +59,6 @@
 import org.apache.wiki.attachment.AttachmentManager;
 import org.apache.wiki.auth.AuthorizationManager;
 import org.apache.wiki.auth.permissions.PagePermission;
-import org.apache.wiki.parser.MarkupParser;
 import org.apache.wiki.providers.WikiPageProvider;
 import org.apache.wiki.util.ClassUtil;
 import org.apache.wiki.util.FileUtil;
@@ -98,13 +97,12 @@
 
     /** Which analyzer to use.  Default is StandardAnalyzer. */
     public static final String PROP_LUCENE_ANALYZER      = "jspwiki.lucene.analyzer";
-
     private static final String PROP_LUCENE_INDEXDELAY   = "jspwiki.lucene.indexdelay";
     private static final String PROP_LUCENE_INITIALDELAY = "jspwiki.lucene.initialdelay";
 
     private String m_analyzerClass = "org.apache.lucene.analysis.standard.ClassicAnalyzer";
 
-    private static final String LUCENE_DIR             = "lucene";
+    private static final String LUCENE_DIR = "lucene";
 
     /** These attachment file suffixes will be indexed. */
     public static final String[] SEARCHABLE_FILE_SUFFIXES = new String[] { ".txt", ".ini", ".xml", ".html", "htm", ".mm", ".htm",
@@ -118,8 +116,8 @@
     protected static final String LUCENE_PAGE_NAME     = "name";
     protected static final String LUCENE_PAGE_KEYWORDS = "keywords";
 
-    private String           m_luceneDirectory;
-    protected List<Object[]> m_updates = Collections.synchronizedList( new ArrayList<>() ); 
+    private String m_luceneDirectory;
+    protected final List< Object[] > m_updates = Collections.synchronizedList( new ArrayList<>() );
 
     /** Maximum number of fragments from search matches. */
     private static final int MAX_FRAGMENTS = 3;
@@ -127,60 +125,49 @@
     /** The maximum number of hits to return from searches. */
     public static final int MAX_SEARCH_HITS = 99_999;
     
-    private static String c_punctuationSpaces = StringUtils.repeat(" ", MarkupParser.PUNCTUATION_CHARS_ALLOWED.length() );
+    private static String c_punctuationSpaces = StringUtils.repeat(" ", TextUtil.PUNCTUATION_CHARS_ALLOWED.length() );
 
     /**
      *  {@inheritDoc}
      */
     @Override
-    public void initialize(WikiEngine engine, Properties props)
-            throws NoRequiredPropertyException, IOException
-    {
+    public void initialize( final WikiEngine engine, final Properties props ) throws NoRequiredPropertyException, IOException  {
         m_engine = engine;
         searchExecutor = Executors.newCachedThreadPool();
 
         m_luceneDirectory = engine.getWorkDir()+File.separator+LUCENE_DIR;
 
-        int initialDelay = TextUtil.getIntegerProperty( props, PROP_LUCENE_INITIALDELAY, LuceneUpdater.INITIAL_DELAY );
-        int indexDelay   = TextUtil.getIntegerProperty( props, PROP_LUCENE_INDEXDELAY, LuceneUpdater.INDEX_DELAY );
+        final int initialDelay = TextUtil.getIntegerProperty( props, PROP_LUCENE_INITIALDELAY, LuceneUpdater.INITIAL_DELAY );
+        final int indexDelay   = TextUtil.getIntegerProperty( props, PROP_LUCENE_INDEXDELAY, LuceneUpdater.INDEX_DELAY );
 
         m_analyzerClass = TextUtil.getStringProperty( props, PROP_LUCENE_ANALYZER, m_analyzerClass );
         // FIXME: Just to be simple for now, we will do full reindex
         // only if no files are in lucene directory.
 
-        File dir = new File(m_luceneDirectory);
-
+        final File dir = new File(m_luceneDirectory);
         log.info("Lucene enabled, cache will be in: "+dir.getAbsolutePath());
-
-        try
-        {
-            if( !dir.exists() )
-            {
+        try {
+            if( !dir.exists() ) {
                 dir.mkdirs();
             }
 
-            if( !dir.exists() || !dir.canWrite() || !dir.canRead() )
-            {
+            if( !dir.exists() || !dir.canWrite() || !dir.canRead() ) {
                 log.error("Cannot write to Lucene directory, disabling Lucene: "+dir.getAbsolutePath());
                 throw new IOException( "Invalid Lucene directory." );
             }
 
-            String[] filelist = dir.list();
-
-            if( filelist == null )
-            {
+            final String[] filelist = dir.list();
+            if( filelist == null ) {
                 throw new IOException( "Invalid Lucene directory: cannot produce listing: "+dir.getAbsolutePath());
             }
-        }
-        catch ( IOException e )
-        {
+        } catch( final IOException e ) {
             log.error("Problem while creating Lucene index - not using Lucene.", e);
         }
 
         // Start the Lucene update thread, which waits first
         // for a little while before starting to go through
         // the Lucene "pages that need updating".
-        LuceneUpdater updater = new LuceneUpdater( m_engine, this, initialDelay, indexDelay );
+        final LuceneUpdater updater = new LuceneUpdater( m_engine, this, initialDelay, indexDelay );
         updater.start();
     }
 
@@ -200,10 +187,8 @@
      *  @throws IOException If there's a problem during indexing
      */
     protected void doFullLuceneReindex() throws IOException {
-        File dir = new File(m_luceneDirectory);
-
-        String[] filelist = dir.list();
-
+        final File dir = new File(m_luceneDirectory);
+        final String[] filelist = dir.list();
         if( filelist == null ) {
             throw new IOException( "Invalid Lucene directory: cannot produce listing: "+dir.getAbsolutePath());
         }
@@ -213,47 +198,45 @@
                 //
                 //  No files? Reindex!
                 //
-                Date start = new Date();
+                final Date start = new Date();
 
                 log.info("Starting Lucene reindexing, this can take a couple of minutes...");
 
-                Directory luceneDir = new SimpleFSDirectory( dir.toPath() );
-                try( IndexWriter writer = getIndexWriter( luceneDir ) )
-                {
-                    Collection< WikiPage > allPages = m_engine.getPageManager().getAllPages();
-                    for( WikiPage page : allPages ) {
-                        
+                final Directory luceneDir = new SimpleFSDirectory( dir.toPath() );
+                try( final IndexWriter writer = getIndexWriter( luceneDir ) ) {
+                    final Collection< WikiPage > allPages = m_engine.getPageManager().getAllPages();
+                    for( final WikiPage page : allPages ) {
                         try {
-                            String text = m_engine.getPageManager().getPageText( page.getName(), WikiProvider.LATEST_VERSION );
+                            final String text = m_engine.getPageManager().getPageText( page.getName(), WikiProvider.LATEST_VERSION );
                             luceneIndexPage( page, text, writer );
-                        } catch( IOException e ) {
+                        } catch( final IOException e ) {
                             log.warn( "Unable to index page " + page.getName() + ", continuing to next ", e );
                         }
                     }
 
-                    Collection< Attachment > allAttachments = m_engine.getAttachmentManager().getAllAttachments();
-                    for( Attachment att : allAttachments ) {
+                    final Collection< Attachment > allAttachments = m_engine.getAttachmentManager().getAllAttachments();
+                    for( final Attachment att : allAttachments ) {
                         try {
-                            String text = getAttachmentContent( att.getName(), WikiProvider.LATEST_VERSION );
+                            final String text = getAttachmentContent( att.getName(), WikiProvider.LATEST_VERSION );
                             luceneIndexPage( att, text, writer );
-                        } catch( IOException e ) {
+                        } catch( final IOException e ) {
                             log.warn( "Unable to index attachment " + att.getName() + ", continuing to next", e );
                         }
                     }
 
                 }
 
-                Date end = new Date();
+                final Date end = new Date();
                 log.info( "Full Lucene index finished in " + (end.getTime() - start.getTime()) + " milliseconds." );
             } else {
                 log.info("Files found in Lucene directory, not reindexing.");
             }
-        } catch ( IOException e ) {
+        } catch ( final IOException e ) {
             log.error("Problem while creating Lucene index - not using Lucene.", e);
-        } catch ( ProviderException e ) {
+        } catch ( final ProviderException e ) {
             log.error("Problem reading pages while creating Lucene index (JSPWiki won't start.)", e);
             throw new IllegalArgumentException("unable to create Lucene index");
-        } catch( Exception e ) {
+        } catch( final Exception e ) {
             log.error("Unable to start lucene",e);
         }
 
@@ -268,24 +251,17 @@
      *  
      *  @return the content of the Attachment as a String.
      */
-    protected String getAttachmentContent( String attachmentName, int version )
-    {
-        AttachmentManager mgr = m_engine.getAttachmentManager();
-
-        try
-        {
-            Attachment att = mgr.getAttachmentInfo( attachmentName, version );
+    protected String getAttachmentContent( final String attachmentName, final int version ) {
+        final AttachmentManager mgr = m_engine.getAttachmentManager();
+        try {
+            final Attachment att = mgr.getAttachmentInfo( attachmentName, version );
             //FIXME: Find out why sometimes att is null
-            if(att != null)
-            {
+            if( att != null ) {
                 return getAttachmentContent( att );
             }
-        }
-        catch (ProviderException e)
-        {
+        } catch( final ProviderException e ) {
             log.error("Attachment cannot be loaded", e);
         }
-        // Something was wrong, no result is returned.
         return null;
     }
 
@@ -295,29 +271,26 @@
      * FIXME This is a very simple implementation of some text-based attachment, mainly used for testing.
      * This should be replaced /moved to Attachment search providers or some other 'pluggable' way to search attachments
      */
-    protected String getAttachmentContent( Attachment att )
-    {
-        AttachmentManager mgr = m_engine.getAttachmentManager();
+    protected String getAttachmentContent( final Attachment att ) {
+        final AttachmentManager mgr = m_engine.getAttachmentManager();
         //FIXME: Add attachment plugin structure
 
-        String filename = att.getFileName();
+        final String filename = att.getFileName();
 
         boolean searchSuffix = false;
-        for( String suffix : SEARCHABLE_FILE_SUFFIXES )
-        {
-            if( filename.endsWith( suffix ) )
-            {
+        for( final String suffix : SEARCHABLE_FILE_SUFFIXES ) {
+            if( filename.endsWith( suffix ) ) {
                 searchSuffix = true;
+                break;
             }
         }
 
         String out = filename;
-        if( searchSuffix )
-        {
+        if( searchSuffix ) {
             try( final InputStream attStream = mgr.getAttachmentStream( att ); final StringWriter sout = new StringWriter() ) {
                 FileUtil.copyContents( new InputStreamReader( attStream ), sout );
                 out = out + " " + sout.toString();
-            } catch( ProviderException | IOException e ) {
+            } catch( final ProviderException | IOException e ) {
                 log.error("Attachment cannot be loaded", e);
             }
         }
@@ -350,19 +323,13 @@
         log.debug("Done updating Lucene index for page '" + page.getName() + "'.");
     }
 
-
-    private Analyzer getLuceneAnalyzer() throws ProviderException
-    {
-        try
-        {
-            Class< ? > clazz = ClassUtil.findClass( "", m_analyzerClass );
-            Constructor< ? > constructor = clazz.getConstructor();
-            Analyzer analyzer = (Analyzer) constructor.newInstance();
-            return analyzer;
-        }
-        catch( Exception e )
-        {
-            String msg = "Could not get LuceneAnalyzer class " + m_analyzerClass + ", reason: ";
+    private Analyzer getLuceneAnalyzer() throws ProviderException {
+        try {
+            final Class< ? > clazz = ClassUtil.findClass( "", m_analyzerClass );
+            final Constructor< ? > constructor = clazz.getConstructor();
+            return ( Analyzer )constructor.newInstance();
+        } catch( final Exception e ) {
+            final String msg = "Could not get LuceneAnalyzer class " + m_analyzerClass + ", reason: ";
             log.error( msg, e );
             throw new ProviderException( msg + e );
         }
@@ -399,13 +366,8 @@
         doc.add( field );
 
         // Allow searching by page name. Both beautified and raw
-        final String unTokenizedTitle = StringUtils.replaceChars( page.getName(),
-                                                                  MarkupParser.PUNCTUATION_CHARS_ALLOWED,
-                                                                  c_punctuationSpaces );
-
-        field = new Field( LUCENE_PAGE_NAME,
-                           TextUtil.beautifyString( page.getName() ) + " " + unTokenizedTitle,
-                           TextField.TYPE_STORED );
+        final String unTokenizedTitle = StringUtils.replaceChars( page.getName(), TextUtil.PUNCTUATION_CHARS_ALLOWED, c_punctuationSpaces );
+        field = new Field( LUCENE_PAGE_NAME, TextUtil.beautifyString( page.getName() ) + " " + unTokenizedTitle, TextField.TYPE_STORED );
         doc.add( field );
 
         // Allow searching by authorname
@@ -446,7 +408,7 @@
     @Override
     public void pageRemoved( final WikiPage page ) {
         try( final Directory luceneDir = new SimpleFSDirectory( new File( m_luceneDirectory ).toPath() );
-             final IndexWriter writer = getIndexWriter( luceneDir ); ) {
+             final IndexWriter writer = getIndexWriter( luceneDir ) ) {
             final Query query = new TermQuery( new Term( LUCENE_ID, page.getName() ) );
             writer.deleteDocuments( query );
         } catch ( final Exception e ) {
@@ -454,13 +416,10 @@
         }
     }
     
-    IndexWriter getIndexWriter( Directory luceneDir ) throws IOException, ProviderException {
-        IndexWriterConfig writerConfig = new IndexWriterConfig( getLuceneAnalyzer() );
+    IndexWriter getIndexWriter(final  Directory luceneDir ) throws IOException, ProviderException {
+        final IndexWriterConfig writerConfig = new IndexWriterConfig( getLuceneAnalyzer() );
         writerConfig.setOpenMode( OpenMode.CREATE_OR_APPEND );
-        IndexWriter writer = new IndexWriter( luceneDir, writerConfig );
-        
-        // writer.setInfoStream( System.out );
-        return writer;
+        return new IndexWriter( luceneDir, writerConfig );
     }
     
     /**
@@ -469,12 +428,11 @@
      *  @param page WikiPage to add to the update queue.
      */
     @Override
-    public void reindexPage( WikiPage page ) {
+    public void reindexPage( final WikiPage page ) {
         if( page != null ) {
-            String text;
+            final String text;
 
             // TODO: Think if this was better done in the thread itself?
-
             if( page instanceof Attachment ) {
                 text = getAttachmentContent( (Attachment) page );
             } else {
@@ -483,10 +441,10 @@
 
             if( text != null ) {
                 // Add work item to m_updates queue.
-                Object[] pair = new Object[2];
+                final Object[] pair = new Object[2];
                 pair[0] = page;
                 pair[1] = text;
-                m_updates.add(pair);
+                m_updates.add( pair );
                 log.debug("Scheduling page " + page.getName() + " for index update");
             }
         }
@@ -496,7 +454,7 @@
      *  {@inheritDoc}
      */
     @Override
-    public Collection< SearchResult > findPages( String query, WikiContext wikiContext ) throws ProviderException {
+    public Collection< SearchResult > findPages( final String query, final WikiContext wikiContext ) throws ProviderException {
         return findPages( query, FLAG_CONTEXTS, wikiContext );
     }
 
@@ -528,44 +486,40 @@
             if( (flags & FLAG_CONTEXTS) != 0 ) {
                 highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"),
                                               new SimpleHTMLEncoder(),
-                                              new QueryScorer(luceneQuery));
+                                              new QueryScorer( luceneQuery ) );
             }
 
             final ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs;
             final AuthorizationManager mgr = m_engine.getAuthorizationManager();
 
             list = new ArrayList<>(hits.length);
-            for ( int curr = 0; curr < hits.length; curr++ ) {
-                int docID = hits[curr].doc;
-                Document doc = searcher.doc( docID );
-                String pageName = doc.get(LUCENE_ID);
-                WikiPage page = m_engine.getPageManager().getPage(pageName, WikiPageProvider.LATEST_VERSION);
+            for( final ScoreDoc hit : hits ) {
+                final int docID = hit.doc;
+                final Document doc = searcher.doc( docID );
+                final String pageName = doc.get( LUCENE_ID );
+                final WikiPage page = m_engine.getPageManager().getPage( pageName, WikiPageProvider.LATEST_VERSION );
 
                 if( page != null ) {
-                    if( page instanceof Attachment ) {
-                        // Currently attachments don't look nice on the search-results page
-                        // When the search-results are cleaned up this can be enabled again.
-                    }
-
                     final PagePermission pp = new PagePermission( page, PagePermission.VIEW_ACTION );
-	                if( mgr.checkPermission( wikiContext.getWikiSession(), pp ) ) {
-                        final int score = (int)(hits[curr].score * 100);
+                    if( mgr.checkPermission( wikiContext.getWikiSession(), pp ) ) {
+                        final int score = ( int )( hit.score * 100 );
 
-	                    // Get highlighted search contexts
-	                    final String text = doc.get(LUCENE_PAGE_CONTENTS);
-	
-	                    String[] fragments = new String[0];
-	                    if( text != null && highlighter != null ) {
-	                        TokenStream tokenStream = getLuceneAnalyzer().tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text));
-	                        fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);
-	                    }
+                        // Get highlighted search contexts
+                        final String text = doc.get( LUCENE_PAGE_CONTENTS );
+
+                        String[] fragments = new String[ 0 ];
+                        if( text != null && highlighter != null ) {
+                            final TokenStream tokenStream = getLuceneAnalyzer()
+                                    .tokenStream( LUCENE_PAGE_CONTENTS, new StringReader( text ) );
+                            fragments = highlighter.getBestFragments( tokenStream, text, MAX_FRAGMENTS );
+                        }
 
                         final SearchResult result = new SearchResultImpl( page, score, fragments );
-	                    list.add(result);
-	                }
+                        list.add( result );
+                    }
                 } else {
-                    log.error("Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache");
-                    pageRemoved(new WikiPage( m_engine, pageName ));
+                    log.error( "Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache" );
+                    pageRemoved( new WikiPage( m_engine, pageName ) );
                 }
             }
         } catch( final IOException e ) {
@@ -592,8 +546,7 @@
     /**
      * Updater thread that updates Lucene indexes.
      */
-    private static final class LuceneUpdater extends WikiBackgroundThread
-    {
+    private static final class LuceneUpdater extends WikiBackgroundThread {
         protected static final int INDEX_DELAY    = 5;
         protected static final int INITIAL_DELAY = 60;
         private final LuceneSearchProvider m_provider;
@@ -602,47 +555,39 @@
 
         private WatchDog m_watchdog;
 
-        private LuceneUpdater( WikiEngine engine, LuceneSearchProvider provider,
-                               int initialDelay, int indexDelay )
-        {
+        private LuceneUpdater( final WikiEngine engine, final LuceneSearchProvider provider, final int initialDelay, final int indexDelay ) {
             super( engine, indexDelay );
             m_provider = provider;
+            m_initialDelay = initialDelay;
             setName("JSPWiki Lucene Indexer");
         }
 
         @Override
-        public void startupTask() throws Exception
-        {
+        public void startupTask() throws Exception {
             m_watchdog = getEngine().getCurrentWatchDog();
 
             // Sleep initially...
-            try
-            {
+            try {
                 Thread.sleep( m_initialDelay * 1000L );
-            }
-            catch( InterruptedException e )
-            {
+            } catch( final InterruptedException e ) {
                 throw new InternalWikiException("Interrupted while waiting to start.", e);
             }
 
-            m_watchdog.enterState("Full reindex");
+            m_watchdog.enterState( "Full reindex" );
             // Reindex everything
             m_provider.doFullLuceneReindex();
             m_watchdog.exitState();
         }
 
         @Override
-        public void backgroundTask() throws Exception
-        {
+        public void backgroundTask() {
             m_watchdog.enterState("Emptying index queue", 60);
 
-            synchronized ( m_provider.m_updates )
-            {
-                while( m_provider.m_updates.size() > 0 )
-                {
-                    Object[] pair = m_provider.m_updates.remove(0);
-                    WikiPage page = ( WikiPage ) pair[0];
-                    String text = ( String ) pair[1];
+            synchronized ( m_provider.m_updates ) {
+                while( m_provider.m_updates.size() > 0 ) {
+                    final Object[] pair = m_provider.m_updates.remove(0);
+                    final WikiPage page = ( WikiPage ) pair[0];
+                    final String text = ( String ) pair[1];
                     m_provider.updateLuceneIndex(page, text);
                 }
             }
@@ -653,16 +598,14 @@
     }
 
     // FIXME: This class is dumb; needs to have a better implementation
-    private static class SearchResultImpl
-        implements SearchResult
-    {
+    private static class SearchResultImpl implements SearchResult {
+
         private WikiPage m_page;
         private int      m_score;
         private String[] m_contexts;
 
-        public SearchResultImpl( WikiPage page, int score, String[] contexts )
-        {
-            m_page  = page;
+        public SearchResultImpl( final WikiPage page, final int score, final String[] contexts ) {
+            m_page = page;
             m_score = score;
             m_contexts = contexts != null ? contexts.clone() : null;
         }
@@ -689,4 +632,5 @@
             return m_contexts;
         }
     }
+
 }

diff --git a/jspwiki-util/src/main/java/org/apache/wiki/util/TextUtil.java b/jspwiki-util/src/main/java/org/apache/wiki/util/TextUtil.java
index 31bf8a1..b50fee6 100644
--- a/jspwiki-util/src/main/java/org/apache/wiki/util/TextUtil.java
+++ b/jspwiki-util/src/main/java/org/apache/wiki/util/TextUtil.java

@@ -43,6 +43,13 @@
     /** Length of password. {@link #generateRandomPassword() */
     public static final int PASSWORD_LENGTH = 8;
 
+    /** Lists all punctuation characters allowed in WikiMarkup. These will not be cleaned away. This is for compatibility for older versions
+     of JSPWiki. */
+    public static final String LEGACY_CHARS_ALLOWED = "._";
+
+    /** Lists all punctuation characters allowed in page names. */
+    public static final String PUNCTUATION_CHARS_ALLOWED = " ()&+,-=._$";
+
     /** Private constructor prevents instantiation. */
     private TextUtil() {}
 
@@ -579,6 +586,61 @@
     }
 
     /**
+     *  Cleans a Wiki name based on a list of characters.  Also, any multiple whitespace is collapsed into a single space, and any
+     *  leading or trailing space is removed.
+     *
+     *  @param text text to be cleared. Null is safe, and causes this to return null.
+     *  @param allowedChars Characters which are allowed in the string.
+     *  @return A cleaned text.
+     *
+     *  @since 2.6
+     */
+    public static String cleanString( String text, final String allowedChars ) {
+        if( text == null ) {
+            return null;
+        }
+
+        text = text.trim();
+        final StringBuilder clean = new StringBuilder( text.length() );
+
+        //  Remove non-alphanumeric characters that should not be put inside WikiNames.  Note that all valid Unicode letters are
+        //  considered okay for WikiNames. It is the problem of the WikiPageProvider to take care of actually storing that information.
+        //
+        //  Also capitalize things, if necessary.
+
+        boolean isWord = true;  // If true, we've just crossed a word boundary
+        boolean wasSpace = false;
+        for( int i = 0; i < text.length(); i++ ) {
+            char ch = text.charAt( i );
+
+            //  Cleans away repetitive whitespace and only uses the first one.
+            if( Character.isWhitespace( ch ) ) {
+                if( wasSpace ) {
+                    continue;
+                }
+
+                wasSpace = true;
+            } else {
+                wasSpace = false;
+            }
+
+            //  Check if it is allowed to use this char, and capitalize, if necessary.
+            if( Character.isLetterOrDigit( ch ) || allowedChars.indexOf( ch ) != -1 ) {
+                // Is a letter
+                if( isWord ) {
+                    ch = Character.toUpperCase( ch );
+                }
+                clean.append( ch );
+                isWord = false;
+            } else {
+                isWord = true;
+            }
+        }
+
+        return clean.toString();
+    }
+
+    /**
      *  Creates a Properties object based on an array which contains alternatively a key and a value.  It is useful
      *  for generating default mappings. For example:
      *  <pre>
@@ -720,11 +782,11 @@
      * @return A String representation
      * @since 2.3.87
      */
-    public static String toHexString( byte[] bytes ) {
+    public static String toHexString( final byte[] bytes ) {
         final StringBuilder sb = new StringBuilder( bytes.length * 2 );
-        for( int i = 0; i < bytes.length; i++ ) {
-            sb.append( toHex( bytes[i] >> 4 ) );
-            sb.append( toHex( bytes[i] ) );
+        for( final byte aByte : bytes ) {
+            sb.append( toHex( aByte >> 4 ) );
+            sb.append( toHex( aByte ) );
         }
 
         return sb.toString();
@@ -766,7 +828,7 @@
     public static String generateRandomPassword() {
         String pw = "";
         for( int i = 0; i < PASSWORD_LENGTH; i++ ) {
-            int index = ( int )( RANDOM.nextDouble() * PWD_BASE.length() );
+            final int index = ( int )( RANDOM.nextDouble() * PWD_BASE.length() );
             pw += PWD_BASE.substring( index, index + 1 );
         }
         return pw;
commit	b39c263a418ec38f91db66871868617ae9db1ed7	[log] [tgz]
author	juanpablo <juanpablo@apache.org>	Thu Jan 16 23:58:37 2020 +0100
committer	juanpablo <juanpablo@apache.org>	Thu Jan 16 23:58:37 2020 +0100
tree	42190ed74fa4b273a04ee645448fb2d0e907b502
parent	d518941177d75a13fbdd4ebe4ca2cf9bade3c238 [diff]