gremlin-go: fix deadlock on (*DriverRemoteConnection).Close
Further analysis shows that apache/tinkerpop#1734 is not the right fix. A
deadlock can still happen in (*gremlinServerWSProtocol).readLoop if
errorCallback is called before the connection protocol is closed and, because
of this, (*gremlinServerWSProtocol).close calls
(*gremlinServerWSProtocol).wg.Wait.
This commit adds the parameter "wait" to protocol.close. So, the call to
(*gremlinServerWSProtocol).wg.Wait can be skipped when called from within an
error callback. Also, the test that was introduced has been updated.
diff --git a/gremlin-go/driver/connection.go b/gremlin-go/driver/connection.go
index b23ddfe..632385d 100644
--- a/gremlin-go/driver/connection.go
+++ b/gremlin-go/driver/connection.go
@@ -55,8 +55,10 @@
func (connection *connection) errorCallback() {
connection.logHandler.log(Error, errorCallback)
connection.state = closedDueToError
- err := connection.protocol.close()
- if err != nil {
+
+ // This callback is called from within protocol.readLoop. Therefore,
+ // it cannot wait for it to finish to avoid a deadlock.
+ if err := connection.protocol.close(false); err != nil {
connection.logHandler.logf(Error, failedToCloseInErrorCallback, err.Error())
}
}
@@ -68,7 +70,7 @@
connection.logHandler.log(Info, closeConnection)
var err error
if connection.protocol != nil {
- err = connection.protocol.close()
+ err = connection.protocol.close(true)
}
connection.state = closed
return err
diff --git a/gremlin-go/driver/protocol.go b/gremlin-go/driver/protocol.go
index 2c62716..b23f5d7 100644
--- a/gremlin-go/driver/protocol.go
+++ b/gremlin-go/driver/protocol.go
@@ -30,7 +30,7 @@
type protocol interface {
readLoop(resultSets *synchronizedMap, errorCallback func())
write(request *request) error
- close() (err error)
+ close(wait bool) error
}
const authenticationFailed = uint16(151)
@@ -161,19 +161,19 @@
return protocol.transporter.Write(bytes)
}
-func (protocol *gremlinServerWSProtocol) close() error {
+func (protocol *gremlinServerWSProtocol) close(wait bool) error {
+ var err error
+
protocol.mutex.Lock()
-
- if protocol.closed {
- protocol.mutex.Unlock()
- return nil
+ if !protocol.closed {
+ err = protocol.transporter.Close()
+ protocol.closed = true
}
-
- err := protocol.transporter.Close()
- protocol.closed = true
protocol.mutex.Unlock()
- protocol.wg.Wait()
+ if wait {
+ protocol.wg.Wait()
+ }
return err
}
diff --git a/gremlin-go/driver/protocol_test.go b/gremlin-go/driver/protocol_test.go
index 5d882d5..08bcbf8 100644
--- a/gremlin-go/driver/protocol_test.go
+++ b/gremlin-go/driver/protocol_test.go
@@ -41,10 +41,7 @@
assert.Nil(t, protocol)
})
- // protocol.closed is only modified by protocol.close(). If it is true
- // it means that protocol.wg.Wait() has already been called, so it
- // should not be called again.
- t.Run("Test protocol close when closed", func(t *testing.T) {
+ t.Run("Test protocol close wait", func(t *testing.T) {
wg := &sync.WaitGroup{}
protocol := &gremlinServerWSProtocol{
closed: true,
@@ -56,14 +53,39 @@
done := make(chan bool)
go func() {
- protocol.close()
+ protocol.close(true)
done <- true
}()
select {
case <-time.After(1 * time.Second):
- t.Fatal("timeout")
+ // Ok. Close must wait.
case <-done:
+ t.Fatal("protocol.close is not waiting")
+ }
+ })
+
+ t.Run("Test protocol close no wait", func(t *testing.T) {
+ wg := &sync.WaitGroup{}
+ protocol := &gremlinServerWSProtocol{
+ closed: true,
+ mutex: sync.Mutex{},
+ wg: wg,
+ }
+ wg.Add(1)
+
+ done := make(chan bool)
+
+ go func() {
+ protocol.close(false)
+ done <- true
+ }()
+
+ select {
+ case <-time.After(1 * time.Second):
+ t.Fatal("protocol.close is waiting")
+ case <-done:
+ // Ok. Close must not wait.
}
})
}