blob: 4d26c8a8dd00024310ed9c5b1ea6e5a9205ef87e [file] [log] [blame]
// Copyright 2018 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tester
import (
"fmt"
"io/ioutil"
"net/http"
"strings"
"sync"
"github.com/coreos/etcd/functional/rpcpb"
)
type failpointStats struct {
mu sync.Mutex
// crashes counts the number of crashes for a failpoint
crashes map[string]int
}
var fpStats failpointStats
func failpointFailures(clus *Cluster) (ret []Case, err error) {
var fps []string
fps, err = failpointPaths(clus.Members[0].FailpointHTTPAddr)
if err != nil {
return nil, err
}
// create failure objects for all failpoints
for _, fp := range fps {
if len(fp) == 0 {
continue
}
fpFails := casesFromFailpoint(fp, clus.Tester.FailpointCommands)
// wrap in delays so failpoint has time to trigger
for i, fpf := range fpFails {
if strings.Contains(fp, "Snap") {
// hack to trigger snapshot failpoints
fpFails[i] = &caseUntilSnapshot{
desc: fpf.Desc(),
rpcpbCase: rpcpb.Case_FAILPOINTS,
Case: fpf,
}
} else {
fpFails[i] = &caseDelay{
Case: fpf,
delayDuration: clus.GetCaseDelayDuration(),
}
}
}
ret = append(ret, fpFails...)
}
fpStats.crashes = make(map[string]int)
return ret, err
}
func failpointPaths(endpoint string) ([]string, error) {
resp, err := http.Get(endpoint)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, rerr := ioutil.ReadAll(resp.Body)
if rerr != nil {
return nil, rerr
}
var fps []string
for _, l := range strings.Split(string(body), "\n") {
fp := strings.Split(l, "=")[0]
fps = append(fps, fp)
}
return fps, nil
}
// failpoints follows FreeBSD FAIL_POINT syntax.
// e.g. panic("etcd-tester"),1*sleep(1000)->panic("etcd-tester")
func casesFromFailpoint(fp string, failpointCommands []string) (fs []Case) {
recov := makeRecoverFailpoint(fp)
for _, fcmd := range failpointCommands {
inject := makeInjectFailpoint(fp, fcmd)
fs = append(fs, []Case{
&caseFollower{
caseByFunc: caseByFunc{
desc: fmt.Sprintf("failpoint %q (one: %q)", fp, fcmd),
rpcpbCase: rpcpb.Case_FAILPOINTS,
injectMember: inject,
recoverMember: recov,
},
last: -1,
lead: -1,
},
&caseLeader{
caseByFunc: caseByFunc{
desc: fmt.Sprintf("failpoint %q (leader: %q)", fp, fcmd),
rpcpbCase: rpcpb.Case_FAILPOINTS,
injectMember: inject,
recoverMember: recov,
},
last: -1,
lead: -1,
},
&caseQuorum{
caseByFunc: caseByFunc{
desc: fmt.Sprintf("failpoint %q (quorum: %q)", fp, fcmd),
rpcpbCase: rpcpb.Case_FAILPOINTS,
injectMember: inject,
recoverMember: recov,
},
injected: make(map[int]struct{}),
},
&caseAll{
desc: fmt.Sprintf("failpoint %q (all: %q)", fp, fcmd),
rpcpbCase: rpcpb.Case_FAILPOINTS,
injectMember: inject,
recoverMember: recov,
},
}...)
}
return fs
}
func makeInjectFailpoint(fp, val string) injectMemberFunc {
return func(clus *Cluster, idx int) (err error) {
return putFailpoint(clus.Members[idx].FailpointHTTPAddr, fp, val)
}
}
func makeRecoverFailpoint(fp string) recoverMemberFunc {
return func(clus *Cluster, idx int) error {
if err := delFailpoint(clus.Members[idx].FailpointHTTPAddr, fp); err == nil {
return nil
}
// node not responding, likely dead from fp panic; restart
fpStats.mu.Lock()
fpStats.crashes[fp]++
fpStats.mu.Unlock()
return recover_SIGTERM_ETCD(clus, idx)
}
}
func putFailpoint(ep, fp, val string) error {
req, _ := http.NewRequest(http.MethodPut, ep+"/"+fp, strings.NewReader(val))
c := http.Client{}
resp, err := c.Do(req)
if err != nil {
return err
}
resp.Body.Close()
if resp.StatusCode/100 != 2 {
return fmt.Errorf("failed to PUT %s=%s at %s (%v)", fp, val, ep, resp.Status)
}
return nil
}
func delFailpoint(ep, fp string) error {
req, _ := http.NewRequest(http.MethodDelete, ep+"/"+fp, strings.NewReader(""))
c := http.Client{}
resp, err := c.Do(req)
if err != nil {
return err
}
resp.Body.Close()
if resp.StatusCode/100 != 2 {
return fmt.Errorf("failed to DELETE %s at %s (%v)", fp, ep, resp.Status)
}
return nil
}