[#6346] Add a 'max items in solr index batch' option

Signed-off-by: Tim Van Steenburgh <tvansteenburgh@gmail.com>
diff --git a/Allura/allura/command/show_models.py b/Allura/allura/command/show_models.py
index 68a55ac..29d2091 100644
--- a/Allura/allura/command/show_models.py
+++ b/Allura/allura/command/show_models.py
@@ -70,6 +70,8 @@
                            'which are needed for some markdown macros to run properly')
     parser.add_option('--solr-hosts', dest='solr_hosts',
                       help='Override the solr host(s) to post to.  Comma-separated list of solr server URLs')
+    parser.add_option('--max-chunk', dest='max_chunk', type=int, default=100*1000,
+                      help='Max number of artifacts to index in one Solr update command')
 
     def command(self):
         from allura import model as M
@@ -90,11 +92,6 @@
         if not self.options.solr and not self.options.refs:
             self.options.solr = self.options.refs = True
 
-        if self.options.solr_hosts:
-            self.add_artifact_kwargs = {'solr_hosts': self.options.solr_hosts.split(',')}
-        else:
-            self.add_artifact_kwargs = {}
-
         for projects in utils.chunked_find(M.Project, q_project):
             for p in projects:
                 c.project = p
@@ -126,13 +123,7 @@
                     M.main_orm_session.flush()
                     M.artifact_orm_session.clear()
                     try:
-                        if self.options.tasks:
-                            self._chunked_add_artifacts(ref_ids)
-                        else:
-                            add_artifacts(ref_ids,
-                                          update_solr=self.options.solr,
-                                          update_refs=self.options.refs,
-                                          **self.add_artifact_kwargs)
+                        self._chunked_add_artifacts(ref_ids)
                     except CompoundError, err:
                         base.log.exception('Error indexing artifacts:\n%r', err)
                         base.log.error('%s', err.format_error())
@@ -140,12 +131,24 @@
                     M.main_orm_session.clear()
         base.log.info('Reindex %s', 'queued' if self.options.tasks else 'done')
 
+    @property
+    def add_artifact_kwargs(self):
+        if self.options.solr_hosts:
+           return {'solr_hosts': self.options.solr_hosts.split(',')}
+        return {}
+
     def _chunked_add_artifacts(self, ref_ids):
         # ref_ids contains solr index ids which can easily be over
         # 100 bytes. Here we allow for 160 bytes avg, plus
         # room for other document overhead.
-        for chunk in utils.chunked_list(ref_ids, 100 * 1000):
-            self._post_add_artifacts(chunk)
+        for chunk in utils.chunked_list(ref_ids, self.options.max_chunk):
+            if self.options.tasks:
+                self._post_add_artifacts(chunk)
+            else:
+                add_artifacts(chunk,
+                              update_solr=self.options.solr,
+                              update_refs=self.options.refs,
+                              **self.add_artifact_kwargs)
 
     def _post_add_artifacts(self, chunk):
         """
diff --git a/Allura/allura/tests/test_commands.py b/Allura/allura/tests/test_commands.py
index 1d72ff6..5545466 100644
--- a/Allura/allura/tests/test_commands.py
+++ b/Allura/allura/tests/test_commands.py
@@ -365,13 +365,17 @@
     @patch('pysolr.Solr')
     def test_solr_hosts_1(self, Solr):
         cmd = show_models.ReindexCommand('reindex')
-        cmd.run([test_config, '-p', 'test', '--solr', '--solr-hosts=http://blah.com/solr/forge'])
+        cmd.options, args = cmd.parser.parse_args([
+            '-p', 'test', '--solr', '--solr-hosts=http://blah.com/solr/forge'])
+        cmd._chunked_add_artifacts(list(range(10)))
         assert_equal(Solr.call_args[0][0], 'http://blah.com/solr/forge')
 
     @patch('pysolr.Solr')
     def test_solr_hosts_list(self, Solr):
         cmd = show_models.ReindexCommand('reindex')
-        cmd.run([test_config, '-p', 'test', '--solr', '--solr-hosts=http://blah.com/solr/forge,https://other.net/solr/forge'])
+        cmd.options, args = cmd.parser.parse_args([
+            '-p', 'test', '--solr', '--solr-hosts=http://blah.com/solr/forge,https://other.net/solr/forge'])
+        cmd._chunked_add_artifacts(list(range(10)))
         # check constructors of first and second Solr() instantiations
         assert_equal(set([Solr.call_args_list[0][0][0], Solr.call_args_list[1][0][0]]),
                      set(['http://blah.com/solr/forge', 'https://other.net/solr/forge'])
@@ -387,13 +391,12 @@
     @patch('allura.command.show_models.add_artifacts')
     def test_chunked_add_artifacts(self, add_artifacts):
         cmd = show_models.ReindexCommand('reindex')
-        cmd.options = Mock()
-        cmd.add_artifact_kwargs = {}
-        ref_ids = list(range(100 * 1000 * 2 + 20))
+        cmd.options = Mock(tasks=True, max_chunk=10*1000)
+        ref_ids = list(range(10 * 1000 * 2 + 20))
         cmd._chunked_add_artifacts(ref_ids)
         assert_equal(len(add_artifacts.post.call_args_list), 3)
-        assert_equal(len(add_artifacts.post.call_args_list[0][0][0]), 100 * 1000)
-        assert_equal(len(add_artifacts.post.call_args_list[1][0][0]), 100 * 1000)
+        assert_equal(len(add_artifacts.post.call_args_list[0][0][0]), 10 * 1000)
+        assert_equal(len(add_artifacts.post.call_args_list[1][0][0]), 10 * 1000)
         assert_equal(len(add_artifacts.post.call_args_list[2][0][0]), 20)
 
     @patch('allura.command.show_models.add_artifacts')
@@ -404,8 +407,7 @@
                         "BSON document too large (16906035 bytes) - the connected server supports BSON document sizes up to 16777216 bytes.")
         add_artifacts.post.side_effect = on_post
         cmd = show_models.ReindexCommand('reindex')
-        cmd.options = Mock()
-        cmd.add_artifact_kwargs = {}
+        cmd.options, args = cmd.parser.parse_args([])
         cmd._post_add_artifacts(range(5))
         kw = {'update_solr': cmd.options.solr, 'update_refs': cmd.options.refs}
         expected = [
@@ -428,6 +430,5 @@
         add_artifacts.post.side_effect = on_post
         cmd = show_models.ReindexCommand('reindex')
         cmd.options = Mock()
-        cmd.add_artifact_kwargs = {}
         with td.raises(pymongo.errors.InvalidDocument):
             cmd._post_add_artifacts(range(5))