blob: d2a69bced85d2b0c6ced79569a18955c4019085d [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import time
import VirtualBrowser
import ConnectorHelpers
from sqatools import LicenseMakerClient
from sqatools import appliance
from threading import Thread
import MetaCartaVersion
# Copy a folder to a (new) area
def copy_folder( source, target ):
appliance.spcall( [ "mkdir", "-p", target ] )
appliance.spcall( [ "cp", "-r", source, target ] )
# Remove a folder
def delete_folder( target ):
appliance.spcall( [ "rm", "-rf", target ] )
# Look for maintenance message displayed in UI
def check_for_maintenance_message_ui( username, password ):
""" See if maintenance message shows up
# Set up virtual browser instance
vb = VirtualBrowser.VirtualBrowser( username=username, password=password )
# First, go to main page
vb.load_main_window( "http://localhost/crawler/index.jsp" )
# Find the link for job management and click it
window = vb.find_window("")
link = window.find_link("Manage jobs") )
# Grab the new window
window = vb.find_window("")
# Use the built-in function to look for a match
window.find_match( "unavailable due to maintenance operations" )
return True
return False
# Crawl user credentials
username = "testingest"
password = "testingest"
def preclean( print_errors=True ):
''' Clean up everything we might have done during the execution of this test.
This will include all jobs and ingested documents. '''
except Exception, e:
if print_errors:
print "Error restarting ingestion"
print e
except Exception, e:
if print_errors:
print "Error resetting all jobs"
print e
# Remove test documents first
for folder in [ "/common/crawlarea" ]:
delete_folder( folder )
except Exception, e:
if print_errors:
print "Error removing %s" % folder
print e
except Exception, e:
if print_errors:
print "Error revoking license"
print e
ConnectorHelpers.delete_crawler_user( username )
except Exception, e:
if print_errors:
print "Error removing crawler user"
print e
ConnectorHelpers.teardown_connector_environment( )
except Exception, e:
if print_errors:
print "Error cleaning up debs"
print e
class run_maintenance_thread(Thread):
def __init__ (self, response):
self.response = response
def run(self):
except Exception, e:
self.response.append( str(e) )
# Main
if __name__ == '__main__':
print "Precleaning!"
preclean( print_errors=False )
print "Setup Connector Environment."
ConnectorHelpers.create_crawler_user( username, password )
ConnectorHelpers.define_gts_outputconnection( )
print "Setting up file area."
# We need at least 10,000 documents. We'll get this by
# having 10 documents under 4 levels of a hierarchy
level0 = 0
while level0 < 10:
level1 = 0
while level1 < 10:
level2 = 0
while level2 < 10:
level3 = 0
while level3 < 10:
pathname = "/common/crawlarea/%d/%d/%d/%d" % (level0,level1,level2,level3)
level3 += 1
level2 += 1
level1 += 1
level0 += 1
# PHASE 1: Ingestion
print "Ingestion Load Test."
# Define repository connection
ConnectorHelpers.define_repositoryconnection( "FileSystem",
"FileSystem Connection",
"com.metacarta.crawler.connectors.filesystem.FileConnector" )
# Define job
doc_spec_xml = '<?xml version="1.0" encoding="UTF-8"?><specification><startpoint path="/common/crawlarea"><include match="*.htm" type="file"/><include match="*" type="directory"/></startpoint></specification>'
job_id = ConnectorHelpers.define_job( "Test job",
doc_spec_xml )
# Run the job
ConnectorHelpers.start_job( job_id )
# Framework abort test! Abort the job and see how long it takes for it to actually stop doing stuff
# First, wait 2 minutes for the job to get really rolling
# Now, abort it
ConnectorHelpers.abort_job( job_id )
# Wait to see how long it actually takes to abort the job
the_time = time.time()
ConnectorHelpers.wait_job_complete( job_id )
elapsed_time = time.time() - the_time;
print "It took %f seconds to abort the job" % elapsed_time
if elapsed_time > 120.0:
raise Exception( "Took too long for job to abort: %f seconds" % elapsed_time )
# This time, run the job to completion
ConnectorHelpers.start_job( job_id )
# Pause test! Pause the job and see how long it takes for it to actually stop doing stuff
# First, wait 3 minutes for the job to get really rolling. We can't pause the job until it is finished starting up.
# Now, pause it
ConnectorHelpers.pause_job( job_id )
# Wait to see how long it actually takes to pause the job
the_time = time.time()
ConnectorHelpers.wait_job_paused( job_id )
elapsed_time = time.time() - the_time;
print "It took %f seconds to pause the job" % elapsed_time
if elapsed_time > 120.0:
raise Exception( "Took too long for job to pause: %f seconds" % elapsed_time )
# Resume job
ConnectorHelpers.resume_job( job_id )
# Next, shut down ingestion for a time, to make sure the job recovers and properly completes afterwards
# Two minutes is enough time to cause serious havoc (if it's going to occur...)
# Start it back up again
# When approximately the largest number of unprocessed documents are on the queue, we stop and start
# metacarta-agents twice in quick succession. We're looking for a spurious FATAL error in the log.
# Check the log for a FATAL error
results = ConnectorHelpers.read_log( "FATAL:" )
if len(results) > 0:
raise Exception("Saw FATAL error in log after quick-succession restarts")
# Now, let job complete
ConnectorHelpers.wait_job_complete( job_id )
# Wait until ingest has caught up
ConnectorHelpers.wait_for_ingest( timeout=5000 )
# See if we can find all of the documents we just ingested
level0 = 0
while level0 < 10:
level1 = 0
while level1 < 10:
level2 = 0
while level2 < 10:
level3 = 0;
while level3 < 10:
ConnectorHelpers.search_exists_check( [ "divestment url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/000.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/000\\.htm" )
ConnectorHelpers.search_exists_check( [ "wildlife url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/001.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/001\\.htm" )
ConnectorHelpers.search_exists_check( [ "visitors url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/002.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/002\\.htm" )
ConnectorHelpers.search_exists_check( [ "fishery url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/003.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/003\\.htm" )
ConnectorHelpers.search_exists_check( [ "concession url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/004.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/004\\.htm" )
ConnectorHelpers.search_exists_check( [ "helicopter url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/005.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/005\\.htm" )
ConnectorHelpers.search_exists_check( [ "moratorium url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/006.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/006\\.htm" )
ConnectorHelpers.search_exists_check( [ "diversified url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/007.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/007\\.htm" )
ConnectorHelpers.search_exists_check( [ "renegotiated url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/008.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/008\\.htm" )
ConnectorHelpers.search_exists_check( [ "wilderness url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/009.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/009\\.htm" )
level3 += 1
level2 += 1
level1 += 1
level0 += 1
# Success: done
print "Done ingestion load test."
print "Performing maintenance/search test"
# Fire up maintenance operation in a separate thread
response_string = ""
mt = run_maintenance_thread(response_string)
# Perform searches every .25 second until maintenance operation is complete
search_count = 0
fail_count = 0
good_count = 0
saw_maintenance_message = False
while True:
# If maintenance complete, break
if mt.isAlive() == False:
responses = ConnectorHelpers.invoke_curl("http://localhost:8180/authorityservice/UserACLs?username=foo")
good_count += 1
except Exception,e:
fail_count += 1
# Now, check the UI also to be sure we show maintenance message at some point...
if not saw_maintenance_message:
saw_maintenance_message = check_for_maintenance_message_ui( username, password )
time.sleep( 0.25 )
if fail_count > 0:
raise Exception("A response indicated a time when tomcat was not available")
if good_count <= 1:
raise Exception("Test invalid; unable to perform sufficient searches while postgresql maintenance ongoing")
if not saw_maintenance_message:
raise Exception("Did not see maintenance message any time during maintenance!")
if len(response_string) > 0:
raise Exception("Maintenance script had an error: %s" % response_string)
print "Done maintenance/search test"
# PHASE 1.5: Look for what happens on a radically changed crawl, to be sure we properly handle the cleanup of old docs
# Note well - this will extend the time of the test somewhat, because only one thread does document cleanup at termination time
new_doc_spec_xml = '<?xml version="1.0" encoding="UTF-8"?><specification><startpoint path="/common/crawlarea"><include match="*.skip" type="file"/><include match="*" type="directory"/></startpoint></specification>'
ConnectorHelpers.change_job_doc_spec( job_id, new_doc_spec_xml )
# Run the job again; the job should scan directories, but find no files and almost immediately enter the "terminating" state, while it cleans up the stuff ingested before
ConnectorHelpers.start_job( job_id )
while True:
results = ConnectorHelpers.list_job_statuses_api( )
if len(results) != 1:
raise Exception("Unexpected number of jobs have a status! Expected: 1, found: %d" % len(results))
job_result = results[0]
job_status = job_result["status"]
if job_status == "terminating":
if job_status != "starting up" and job_status != "running":
raise Exception("Unexpected job status: %s" % job_status)
# Wait for cleanup to finish
ConnectorHelpers.wait_job_complete( job_id )
# PHASE 2: Cleanup test
print "Cleanup Load Test."
ConnectorHelpers.delete_job( job_id )
print "...job delete request sent"
# If broken, the following sequence of activity will cause the job to be stuck in "cleaning up" forever.
ConnectorHelpers.wait_job_deleted( job_id )
print "...job has vanished"
# Make sure the documents all went away
level0 = 0
while level0 < 10:
level1 = 0
while level1 < 10:
level2 = 0
while level2 < 10:
level3 = 0;
while level3 < 10:
ConnectorHelpers.search_nonexists_check( [ "divestment url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/000.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/000\\.htm" )
ConnectorHelpers.search_nonexists_check( [ "wildlife url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/001.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/001\\.htm" )
ConnectorHelpers.search_nonexists_check( [ "visitors url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/002.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/002\\.htm" )
ConnectorHelpers.search_nonexists_check( [ "fishery url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/003.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/003\\.htm" )
ConnectorHelpers.search_nonexists_check( [ "concession url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/004.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/004\\.htm" )
ConnectorHelpers.search_nonexists_check( [ "helicopter url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/005.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/005\\.htm" )
ConnectorHelpers.search_nonexists_check( [ "moratorium url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/006.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/006\\.htm" )
ConnectorHelpers.search_nonexists_check( [ "diversified url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/007.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/007\\.htm" )
ConnectorHelpers.search_nonexists_check( [ "renegotiated url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/008.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/008\\.htm" )
ConnectorHelpers.search_nonexists_check( [ "wilderness url:file:/common/crawlarea/%d/%d/%d/%d/largefiles/009.htm" % (level0,level1,level2,level3) ], None, "common/crawlarea/[0-9]/[0-9]/[0-9]/[0-9]/largefiles/009\\.htm" )
level3 += 1
level2 += 1
level1 += 1
level0 += 1
print "Done Cleanup Load Test."
ConnectorHelpers.delete_repositoryconnection( "FileSystem" )
ConnectorHelpers.delete_gts_outputconnection( )
ConnectorHelpers.delete_crawler_user( username )
ConnectorHelpers.teardown_connector_environment( )
print "Load ConnectorFramework tests PASSED"