| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <html lang="en"> |
| <head> |
| <title>Source code</title> |
| <link rel="stylesheet" type="text/css" href="../../../../../../../stylesheet.css" title="Style"> |
| </head> |
| <body> |
| <div class="sourceContainer"> |
| <pre><span class="sourceLineNo">001</span>/*<a name="line.1"></a> |
| <span class="sourceLineNo">002</span> * Licensed to the Apache Software Foundation (ASF) under one<a name="line.2"></a> |
| <span class="sourceLineNo">003</span> * or more contributor license agreements. See the NOTICE file<a name="line.3"></a> |
| <span class="sourceLineNo">004</span> * distributed with this work for additional information<a name="line.4"></a> |
| <span class="sourceLineNo">005</span> * regarding copyright ownership. The ASF licenses this file<a name="line.5"></a> |
| <span class="sourceLineNo">006</span> * to you under the Apache License, Version 2.0 (the<a name="line.6"></a> |
| <span class="sourceLineNo">007</span> * "License"); you may not use this file except in compliance<a name="line.7"></a> |
| <span class="sourceLineNo">008</span> * with the License. You may obtain a copy of the License at<a name="line.8"></a> |
| <span class="sourceLineNo">009</span> *<a name="line.9"></a> |
| <span class="sourceLineNo">010</span> * http://www.apache.org/licenses/LICENSE-2.0<a name="line.10"></a> |
| <span class="sourceLineNo">011</span> *<a name="line.11"></a> |
| <span class="sourceLineNo">012</span> * Unless required by applicable law or agreed to in writing, software<a name="line.12"></a> |
| <span class="sourceLineNo">013</span> * distributed under the License is distributed on an "AS IS" BASIS,<a name="line.13"></a> |
| <span class="sourceLineNo">014</span> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.<a name="line.14"></a> |
| <span class="sourceLineNo">015</span> * See the License for the specific language governing permissions and<a name="line.15"></a> |
| <span class="sourceLineNo">016</span> * limitations under the License.<a name="line.16"></a> |
| <span class="sourceLineNo">017</span> */<a name="line.17"></a> |
| <span class="sourceLineNo">018</span>package org.apache.hadoop.hbase.master.assignment;<a name="line.18"></a> |
| <span class="sourceLineNo">019</span><a name="line.19"></a> |
| <span class="sourceLineNo">020</span>import edu.umd.cs.findbugs.annotations.NonNull;<a name="line.20"></a> |
| <span class="sourceLineNo">021</span>import java.io.IOException;<a name="line.21"></a> |
| <span class="sourceLineNo">022</span>import java.util.ArrayList;<a name="line.22"></a> |
| <span class="sourceLineNo">023</span>import java.util.Collection;<a name="line.23"></a> |
| <span class="sourceLineNo">024</span>import java.util.Collections;<a name="line.24"></a> |
| <span class="sourceLineNo">025</span>import java.util.HashMap;<a name="line.25"></a> |
| <span class="sourceLineNo">026</span>import java.util.HashSet;<a name="line.26"></a> |
| <span class="sourceLineNo">027</span>import java.util.List;<a name="line.27"></a> |
| <span class="sourceLineNo">028</span>import java.util.Map;<a name="line.28"></a> |
| <span class="sourceLineNo">029</span>import java.util.Set;<a name="line.29"></a> |
| <span class="sourceLineNo">030</span>import java.util.concurrent.CompletableFuture;<a name="line.30"></a> |
| <span class="sourceLineNo">031</span>import java.util.concurrent.Future;<a name="line.31"></a> |
| <span class="sourceLineNo">032</span>import java.util.concurrent.TimeUnit;<a name="line.32"></a> |
| <span class="sourceLineNo">033</span>import java.util.concurrent.atomic.AtomicBoolean;<a name="line.33"></a> |
| <span class="sourceLineNo">034</span>import java.util.concurrent.locks.Condition;<a name="line.34"></a> |
| <span class="sourceLineNo">035</span>import java.util.concurrent.locks.ReentrantLock;<a name="line.35"></a> |
| <span class="sourceLineNo">036</span>import java.util.stream.Collectors;<a name="line.36"></a> |
| <span class="sourceLineNo">037</span>import java.util.stream.Stream;<a name="line.37"></a> |
| <span class="sourceLineNo">038</span>import org.apache.hadoop.conf.Configuration;<a name="line.38"></a> |
| <span class="sourceLineNo">039</span>import org.apache.hadoop.hbase.CatalogFamilyFormat;<a name="line.39"></a> |
| <span class="sourceLineNo">040</span>import org.apache.hadoop.hbase.DoNotRetryIOException;<a name="line.40"></a> |
| <span class="sourceLineNo">041</span>import org.apache.hadoop.hbase.HBaseIOException;<a name="line.41"></a> |
| <span class="sourceLineNo">042</span>import org.apache.hadoop.hbase.HConstants;<a name="line.42"></a> |
| <span class="sourceLineNo">043</span>import org.apache.hadoop.hbase.PleaseHoldException;<a name="line.43"></a> |
| <span class="sourceLineNo">044</span>import org.apache.hadoop.hbase.ServerName;<a name="line.44"></a> |
| <span class="sourceLineNo">045</span>import org.apache.hadoop.hbase.TableName;<a name="line.45"></a> |
| <span class="sourceLineNo">046</span>import org.apache.hadoop.hbase.UnknownRegionException;<a name="line.46"></a> |
| <span class="sourceLineNo">047</span>import org.apache.hadoop.hbase.client.DoNotRetryRegionException;<a name="line.47"></a> |
| <span class="sourceLineNo">048</span>import org.apache.hadoop.hbase.client.MasterSwitchType;<a name="line.48"></a> |
| <span class="sourceLineNo">049</span>import org.apache.hadoop.hbase.client.RegionInfo;<a name="line.49"></a> |
| <span class="sourceLineNo">050</span>import org.apache.hadoop.hbase.client.RegionInfoBuilder;<a name="line.50"></a> |
| <span class="sourceLineNo">051</span>import org.apache.hadoop.hbase.client.RegionReplicaUtil;<a name="line.51"></a> |
| <span class="sourceLineNo">052</span>import org.apache.hadoop.hbase.client.RegionStatesCount;<a name="line.52"></a> |
| <span class="sourceLineNo">053</span>import org.apache.hadoop.hbase.client.Result;<a name="line.53"></a> |
| <span class="sourceLineNo">054</span>import org.apache.hadoop.hbase.client.ResultScanner;<a name="line.54"></a> |
| <span class="sourceLineNo">055</span>import org.apache.hadoop.hbase.client.Scan;<a name="line.55"></a> |
| <span class="sourceLineNo">056</span>import org.apache.hadoop.hbase.client.TableDescriptor;<a name="line.56"></a> |
| <span class="sourceLineNo">057</span>import org.apache.hadoop.hbase.client.TableState;<a name="line.57"></a> |
| <span class="sourceLineNo">058</span>import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;<a name="line.58"></a> |
| <span class="sourceLineNo">059</span>import org.apache.hadoop.hbase.favored.FavoredNodesManager;<a name="line.59"></a> |
| <span class="sourceLineNo">060</span>import org.apache.hadoop.hbase.favored.FavoredNodesPromoter;<a name="line.60"></a> |
| <span class="sourceLineNo">061</span>import org.apache.hadoop.hbase.master.LoadBalancer;<a name="line.61"></a> |
| <span class="sourceLineNo">062</span>import org.apache.hadoop.hbase.master.MasterServices;<a name="line.62"></a> |
| <span class="sourceLineNo">063</span>import org.apache.hadoop.hbase.master.MetricsAssignmentManager;<a name="line.63"></a> |
| <span class="sourceLineNo">064</span>import org.apache.hadoop.hbase.master.RegionPlan;<a name="line.64"></a> |
| <span class="sourceLineNo">065</span>import org.apache.hadoop.hbase.master.RegionState;<a name="line.65"></a> |
| <span class="sourceLineNo">066</span>import org.apache.hadoop.hbase.master.RegionState.State;<a name="line.66"></a> |
| <span class="sourceLineNo">067</span>import org.apache.hadoop.hbase.master.ServerManager;<a name="line.67"></a> |
| <span class="sourceLineNo">068</span>import org.apache.hadoop.hbase.master.TableStateManager;<a name="line.68"></a> |
| <span class="sourceLineNo">069</span>import org.apache.hadoop.hbase.master.balancer.FavoredStochasticBalancer;<a name="line.69"></a> |
| <span class="sourceLineNo">070</span>import org.apache.hadoop.hbase.master.procedure.HBCKServerCrashProcedure;<a name="line.70"></a> |
| <span class="sourceLineNo">071</span>import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;<a name="line.71"></a> |
| <span class="sourceLineNo">072</span>import org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler;<a name="line.72"></a> |
| <span class="sourceLineNo">073</span>import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait;<a name="line.73"></a> |
| <span class="sourceLineNo">074</span>import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;<a name="line.74"></a> |
| <span class="sourceLineNo">075</span>import org.apache.hadoop.hbase.master.procedure.TruncateRegionProcedure;<a name="line.75"></a> |
| <span class="sourceLineNo">076</span>import org.apache.hadoop.hbase.master.region.MasterRegion;<a name="line.76"></a> |
| <span class="sourceLineNo">077</span>import org.apache.hadoop.hbase.procedure2.Procedure;<a name="line.77"></a> |
| <span class="sourceLineNo">078</span>import org.apache.hadoop.hbase.procedure2.ProcedureEvent;<a name="line.78"></a> |
| <span class="sourceLineNo">079</span>import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;<a name="line.79"></a> |
| <span class="sourceLineNo">080</span>import org.apache.hadoop.hbase.procedure2.ProcedureInMemoryChore;<a name="line.80"></a> |
| <span class="sourceLineNo">081</span>import org.apache.hadoop.hbase.procedure2.util.StringUtils;<a name="line.81"></a> |
| <span class="sourceLineNo">082</span>import org.apache.hadoop.hbase.regionserver.SequenceId;<a name="line.82"></a> |
| <span class="sourceLineNo">083</span>import org.apache.hadoop.hbase.rsgroup.RSGroupBasedLoadBalancer;<a name="line.83"></a> |
| <span class="sourceLineNo">084</span>import org.apache.hadoop.hbase.util.Bytes;<a name="line.84"></a> |
| <span class="sourceLineNo">085</span>import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;<a name="line.85"></a> |
| <span class="sourceLineNo">086</span>import org.apache.hadoop.hbase.util.FutureUtils;<a name="line.86"></a> |
| <span class="sourceLineNo">087</span>import org.apache.hadoop.hbase.util.Pair;<a name="line.87"></a> |
| <span class="sourceLineNo">088</span>import org.apache.hadoop.hbase.util.Threads;<a name="line.88"></a> |
| <span class="sourceLineNo">089</span>import org.apache.hadoop.hbase.util.VersionInfo;<a name="line.89"></a> |
| <span class="sourceLineNo">090</span>import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;<a name="line.90"></a> |
| <span class="sourceLineNo">091</span>import org.apache.hadoop.hbase.zookeeper.ZKWatcher;<a name="line.91"></a> |
| <span class="sourceLineNo">092</span>import org.apache.yetus.audience.InterfaceAudience;<a name="line.92"></a> |
| <span class="sourceLineNo">093</span>import org.apache.zookeeper.KeeperException;<a name="line.93"></a> |
| <span class="sourceLineNo">094</span>import org.slf4j.Logger;<a name="line.94"></a> |
| <span class="sourceLineNo">095</span>import org.slf4j.LoggerFactory;<a name="line.95"></a> |
| <span class="sourceLineNo">096</span><a name="line.96"></a> |
| <span class="sourceLineNo">097</span>import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;<a name="line.97"></a> |
| <span class="sourceLineNo">098</span>import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;<a name="line.98"></a> |
| <span class="sourceLineNo">099</span>import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;<a name="line.99"></a> |
| <span class="sourceLineNo">100</span>import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;<a name="line.100"></a> |
| <span class="sourceLineNo">101</span>import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;<a name="line.101"></a> |
| <span class="sourceLineNo">102</span><a name="line.102"></a> |
| <span class="sourceLineNo">103</span>/**<a name="line.103"></a> |
| <span class="sourceLineNo">104</span> * The AssignmentManager is the coordinator for region assign/unassign operations.<a name="line.104"></a> |
| <span class="sourceLineNo">105</span> * <ul><a name="line.105"></a> |
| <span class="sourceLineNo">106</span> * <li>In-memory states of regions and servers are stored in {@link RegionStates}.</li><a name="line.106"></a> |
| <span class="sourceLineNo">107</span> * <li>hbase:meta state updates are handled by {@link RegionStateStore}.</li><a name="line.107"></a> |
| <span class="sourceLineNo">108</span> * </ul><a name="line.108"></a> |
| <span class="sourceLineNo">109</span> * Regions are created by CreateTable, Split, Merge. Regions are deleted by DeleteTable, Split,<a name="line.109"></a> |
| <span class="sourceLineNo">110</span> * Merge. Assigns are triggered by CreateTable, EnableTable, Split, Merge, ServerCrash. Unassigns<a name="line.110"></a> |
| <span class="sourceLineNo">111</span> * are triggered by DisableTable, Split, Merge<a name="line.111"></a> |
| <span class="sourceLineNo">112</span> */<a name="line.112"></a> |
| <span class="sourceLineNo">113</span>@InterfaceAudience.Private<a name="line.113"></a> |
| <span class="sourceLineNo">114</span>public class AssignmentManager {<a name="line.114"></a> |
| <span class="sourceLineNo">115</span> private static final Logger LOG = LoggerFactory.getLogger(AssignmentManager.class);<a name="line.115"></a> |
| <span class="sourceLineNo">116</span><a name="line.116"></a> |
| <span class="sourceLineNo">117</span> // TODO: AMv2<a name="line.117"></a> |
| <span class="sourceLineNo">118</span> // - handle region migration from hbase1 to hbase2.<a name="line.118"></a> |
| <span class="sourceLineNo">119</span> // - handle sys table assignment first (e.g. acl, namespace)<a name="line.119"></a> |
| <span class="sourceLineNo">120</span> // - handle table priorities<a name="line.120"></a> |
| <span class="sourceLineNo">121</span> // - If ServerBusyException trying to update hbase:meta, we abort the Master<a name="line.121"></a> |
| <span class="sourceLineNo">122</span> // See updateRegionLocation in RegionStateStore.<a name="line.122"></a> |
| <span class="sourceLineNo">123</span> //<a name="line.123"></a> |
| <span class="sourceLineNo">124</span> // See also<a name="line.124"></a> |
| <span class="sourceLineNo">125</span> // https://docs.google.com/document/d/1eVKa7FHdeoJ1-9o8yZcOTAQbv0u0bblBlCCzVSIn69g/edit#heading=h.ystjyrkbtoq5<a name="line.125"></a> |
| <span class="sourceLineNo">126</span> // for other TODOs.<a name="line.126"></a> |
| <span class="sourceLineNo">127</span><a name="line.127"></a> |
| <span class="sourceLineNo">128</span> public static final String BOOTSTRAP_THREAD_POOL_SIZE_CONF_KEY =<a name="line.128"></a> |
| <span class="sourceLineNo">129</span> "hbase.assignment.bootstrap.thread.pool.size";<a name="line.129"></a> |
| <span class="sourceLineNo">130</span><a name="line.130"></a> |
| <span class="sourceLineNo">131</span> public static final String ASSIGN_DISPATCH_WAIT_MSEC_CONF_KEY =<a name="line.131"></a> |
| <span class="sourceLineNo">132</span> "hbase.assignment.dispatch.wait.msec";<a name="line.132"></a> |
| <span class="sourceLineNo">133</span> private static final int DEFAULT_ASSIGN_DISPATCH_WAIT_MSEC = 150;<a name="line.133"></a> |
| <span class="sourceLineNo">134</span><a name="line.134"></a> |
| <span class="sourceLineNo">135</span> public static final String ASSIGN_DISPATCH_WAITQ_MAX_CONF_KEY =<a name="line.135"></a> |
| <span class="sourceLineNo">136</span> "hbase.assignment.dispatch.wait.queue.max.size";<a name="line.136"></a> |
| <span class="sourceLineNo">137</span> private static final int DEFAULT_ASSIGN_DISPATCH_WAITQ_MAX = 100;<a name="line.137"></a> |
| <span class="sourceLineNo">138</span><a name="line.138"></a> |
| <span class="sourceLineNo">139</span> public static final String RIT_CHORE_INTERVAL_MSEC_CONF_KEY =<a name="line.139"></a> |
| <span class="sourceLineNo">140</span> "hbase.assignment.rit.chore.interval.msec";<a name="line.140"></a> |
| <span class="sourceLineNo">141</span> private static final int DEFAULT_RIT_CHORE_INTERVAL_MSEC = 60 * 1000;<a name="line.141"></a> |
| <span class="sourceLineNo">142</span><a name="line.142"></a> |
| <span class="sourceLineNo">143</span> public static final String DEAD_REGION_METRIC_CHORE_INTERVAL_MSEC_CONF_KEY =<a name="line.143"></a> |
| <span class="sourceLineNo">144</span> "hbase.assignment.dead.region.metric.chore.interval.msec";<a name="line.144"></a> |
| <span class="sourceLineNo">145</span> private static final int DEFAULT_DEAD_REGION_METRIC_CHORE_INTERVAL_MSEC = 120 * 1000;<a name="line.145"></a> |
| <span class="sourceLineNo">146</span><a name="line.146"></a> |
| <span class="sourceLineNo">147</span> public static final String ASSIGN_MAX_ATTEMPTS = "hbase.assignment.maximum.attempts";<a name="line.147"></a> |
| <span class="sourceLineNo">148</span> private static final int DEFAULT_ASSIGN_MAX_ATTEMPTS = Integer.MAX_VALUE;<a name="line.148"></a> |
| <span class="sourceLineNo">149</span><a name="line.149"></a> |
| <span class="sourceLineNo">150</span> public static final String ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS =<a name="line.150"></a> |
| <span class="sourceLineNo">151</span> "hbase.assignment.retry.immediately.maximum.attempts";<a name="line.151"></a> |
| <span class="sourceLineNo">152</span> private static final int DEFAULT_ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS = 3;<a name="line.152"></a> |
| <span class="sourceLineNo">153</span><a name="line.153"></a> |
| <span class="sourceLineNo">154</span> /** Region in Transition metrics threshold time */<a name="line.154"></a> |
| <span class="sourceLineNo">155</span> public static final String METRICS_RIT_STUCK_WARNING_THRESHOLD =<a name="line.155"></a> |
| <span class="sourceLineNo">156</span> "hbase.metrics.rit.stuck.warning.threshold";<a name="line.156"></a> |
| <span class="sourceLineNo">157</span> private static final int DEFAULT_RIT_STUCK_WARNING_THRESHOLD = 60 * 1000;<a name="line.157"></a> |
| <span class="sourceLineNo">158</span> public static final String UNEXPECTED_STATE_REGION = "Unexpected state for ";<a name="line.158"></a> |
| <span class="sourceLineNo">159</span><a name="line.159"></a> |
| <span class="sourceLineNo">160</span> public static final String FORCE_REGION_RETAINMENT = "hbase.master.scp.retain.assignment.force";<a name="line.160"></a> |
| <span class="sourceLineNo">161</span><a name="line.161"></a> |
| <span class="sourceLineNo">162</span> public static final boolean DEFAULT_FORCE_REGION_RETAINMENT = false;<a name="line.162"></a> |
| <span class="sourceLineNo">163</span><a name="line.163"></a> |
| <span class="sourceLineNo">164</span> /** The wait time in millis before checking again if the region's previous RS is back online */<a name="line.164"></a> |
| <span class="sourceLineNo">165</span> public static final String FORCE_REGION_RETAINMENT_WAIT_INTERVAL =<a name="line.165"></a> |
| <span class="sourceLineNo">166</span> "hbase.master.scp.retain.assignment.force.wait-interval";<a name="line.166"></a> |
| <span class="sourceLineNo">167</span><a name="line.167"></a> |
| <span class="sourceLineNo">168</span> public static final long DEFAULT_FORCE_REGION_RETAINMENT_WAIT_INTERVAL = 50;<a name="line.168"></a> |
| <span class="sourceLineNo">169</span><a name="line.169"></a> |
| <span class="sourceLineNo">170</span> /**<a name="line.170"></a> |
| <span class="sourceLineNo">171</span> * The number of times to check if the region's previous RS is back online, before giving up and<a name="line.171"></a> |
| <span class="sourceLineNo">172</span> * proceeding with assignment on a new RS<a name="line.172"></a> |
| <span class="sourceLineNo">173</span> */<a name="line.173"></a> |
| <span class="sourceLineNo">174</span> public static final String FORCE_REGION_RETAINMENT_RETRIES =<a name="line.174"></a> |
| <span class="sourceLineNo">175</span> "hbase.master.scp.retain.assignment.force.retries";<a name="line.175"></a> |
| <span class="sourceLineNo">176</span><a name="line.176"></a> |
| <span class="sourceLineNo">177</span> public static final int DEFAULT_FORCE_REGION_RETAINMENT_RETRIES = 600;<a name="line.177"></a> |
| <span class="sourceLineNo">178</span><a name="line.178"></a> |
| <span class="sourceLineNo">179</span> private final ProcedureEvent<?> metaAssignEvent = new ProcedureEvent<>("meta assign");<a name="line.179"></a> |
| <span class="sourceLineNo">180</span> private final ProcedureEvent<?> metaLoadEvent = new ProcedureEvent<>("meta load");<a name="line.180"></a> |
| <span class="sourceLineNo">181</span><a name="line.181"></a> |
| <span class="sourceLineNo">182</span> private final MetricsAssignmentManager metrics;<a name="line.182"></a> |
| <span class="sourceLineNo">183</span> private final RegionInTransitionChore ritChore;<a name="line.183"></a> |
| <span class="sourceLineNo">184</span> private final DeadServerMetricRegionChore deadMetricChore;<a name="line.184"></a> |
| <span class="sourceLineNo">185</span> private final MasterServices master;<a name="line.185"></a> |
| <span class="sourceLineNo">186</span><a name="line.186"></a> |
| <span class="sourceLineNo">187</span> private final AtomicBoolean running = new AtomicBoolean(false);<a name="line.187"></a> |
| <span class="sourceLineNo">188</span> private final RegionStates regionStates = new RegionStates();<a name="line.188"></a> |
| <span class="sourceLineNo">189</span> private final RegionStateStore regionStateStore;<a name="line.189"></a> |
| <span class="sourceLineNo">190</span><a name="line.190"></a> |
| <span class="sourceLineNo">191</span> /**<a name="line.191"></a> |
| <span class="sourceLineNo">192</span> * When the operator uses this configuration option, any version between the current cluster<a name="line.192"></a> |
| <span class="sourceLineNo">193</span> * version and the value of "hbase.min.version.move.system.tables" does not trigger any<a name="line.193"></a> |
| <span class="sourceLineNo">194</span> * auto-region movement. Auto-region movement here refers to auto-migration of system table<a name="line.194"></a> |
| <span class="sourceLineNo">195</span> * regions to newer server versions. It is assumed that the configured range of versions does not<a name="line.195"></a> |
| <span class="sourceLineNo">196</span> * require special handling of moving system table regions to higher versioned RegionServer. This<a name="line.196"></a> |
| <span class="sourceLineNo">197</span> * auto-migration is done by {@link #checkIfShouldMoveSystemRegionAsync()}. Example: Let's assume<a name="line.197"></a> |
| <span class="sourceLineNo">198</span> * the cluster is on version 1.4.0 and we have set "hbase.min.version.move.system.tables" as<a name="line.198"></a> |
| <span class="sourceLineNo">199</span> * "2.0.0". Now if we upgrade one RegionServer on 1.4.0 cluster to 1.6.0 (< 2.0.0), then<a name="line.199"></a> |
| <span class="sourceLineNo">200</span> * AssignmentManager will not move hbase:meta, hbase:namespace and other system table regions to<a name="line.200"></a> |
| <span class="sourceLineNo">201</span> * newly brought up RegionServer 1.6.0 as part of auto-migration. However, if we upgrade one<a name="line.201"></a> |
| <span class="sourceLineNo">202</span> * RegionServer on 1.4.0 cluster to 2.2.0 (> 2.0.0), then AssignmentManager will move all system<a name="line.202"></a> |
| <span class="sourceLineNo">203</span> * table regions to newly brought up RegionServer 2.2.0 as part of auto-migration done by<a name="line.203"></a> |
| <span class="sourceLineNo">204</span> * {@link #checkIfShouldMoveSystemRegionAsync()}. "hbase.min.version.move.system.tables" is<a name="line.204"></a> |
| <span class="sourceLineNo">205</span> * introduced as part of HBASE-22923.<a name="line.205"></a> |
| <span class="sourceLineNo">206</span> */<a name="line.206"></a> |
| <span class="sourceLineNo">207</span> private final String minVersionToMoveSysTables;<a name="line.207"></a> |
| <span class="sourceLineNo">208</span><a name="line.208"></a> |
| <span class="sourceLineNo">209</span> private static final String MIN_VERSION_MOVE_SYS_TABLES_CONFIG =<a name="line.209"></a> |
| <span class="sourceLineNo">210</span> "hbase.min.version.move.system.tables";<a name="line.210"></a> |
| <span class="sourceLineNo">211</span> private static final String DEFAULT_MIN_VERSION_MOVE_SYS_TABLES_CONFIG = "";<a name="line.211"></a> |
| <span class="sourceLineNo">212</span><a name="line.212"></a> |
| <span class="sourceLineNo">213</span> private final Map<ServerName, Set<byte[]>> rsReports = new HashMap<>();<a name="line.213"></a> |
| <span class="sourceLineNo">214</span><a name="line.214"></a> |
| <span class="sourceLineNo">215</span> private final boolean shouldAssignRegionsWithFavoredNodes;<a name="line.215"></a> |
| <span class="sourceLineNo">216</span> private final int assignDispatchWaitQueueMaxSize;<a name="line.216"></a> |
| <span class="sourceLineNo">217</span> private final int assignDispatchWaitMillis;<a name="line.217"></a> |
| <span class="sourceLineNo">218</span> private final int assignMaxAttempts;<a name="line.218"></a> |
| <span class="sourceLineNo">219</span> private final int assignRetryImmediatelyMaxAttempts;<a name="line.219"></a> |
| <span class="sourceLineNo">220</span><a name="line.220"></a> |
| <span class="sourceLineNo">221</span> private final MasterRegion masterRegion;<a name="line.221"></a> |
| <span class="sourceLineNo">222</span><a name="line.222"></a> |
| <span class="sourceLineNo">223</span> private final Object checkIfShouldMoveSystemRegionLock = new Object();<a name="line.223"></a> |
| <span class="sourceLineNo">224</span><a name="line.224"></a> |
| <span class="sourceLineNo">225</span> private Thread assignThread;<a name="line.225"></a> |
| <span class="sourceLineNo">226</span><a name="line.226"></a> |
| <span class="sourceLineNo">227</span> private final boolean forceRegionRetainment;<a name="line.227"></a> |
| <span class="sourceLineNo">228</span><a name="line.228"></a> |
| <span class="sourceLineNo">229</span> private final long forceRegionRetainmentWaitInterval;<a name="line.229"></a> |
| <span class="sourceLineNo">230</span><a name="line.230"></a> |
| <span class="sourceLineNo">231</span> private final int forceRegionRetainmentRetries;<a name="line.231"></a> |
| <span class="sourceLineNo">232</span><a name="line.232"></a> |
| <span class="sourceLineNo">233</span> public AssignmentManager(MasterServices master, MasterRegion masterRegion) {<a name="line.233"></a> |
| <span class="sourceLineNo">234</span> this(master, masterRegion, new RegionStateStore(master, masterRegion));<a name="line.234"></a> |
| <span class="sourceLineNo">235</span> }<a name="line.235"></a> |
| <span class="sourceLineNo">236</span><a name="line.236"></a> |
| <span class="sourceLineNo">237</span> AssignmentManager(MasterServices master, MasterRegion masterRegion, RegionStateStore stateStore) {<a name="line.237"></a> |
| <span class="sourceLineNo">238</span> this.master = master;<a name="line.238"></a> |
| <span class="sourceLineNo">239</span> this.regionStateStore = stateStore;<a name="line.239"></a> |
| <span class="sourceLineNo">240</span> this.metrics = new MetricsAssignmentManager();<a name="line.240"></a> |
| <span class="sourceLineNo">241</span> this.masterRegion = masterRegion;<a name="line.241"></a> |
| <span class="sourceLineNo">242</span><a name="line.242"></a> |
| <span class="sourceLineNo">243</span> final Configuration conf = master.getConfiguration();<a name="line.243"></a> |
| <span class="sourceLineNo">244</span><a name="line.244"></a> |
| <span class="sourceLineNo">245</span> // Only read favored nodes if using the favored nodes load balancer.<a name="line.245"></a> |
| <span class="sourceLineNo">246</span> this.shouldAssignRegionsWithFavoredNodes = FavoredStochasticBalancer.class<a name="line.246"></a> |
| <span class="sourceLineNo">247</span> .isAssignableFrom(conf.getClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, Object.class));<a name="line.247"></a> |
| <span class="sourceLineNo">248</span><a name="line.248"></a> |
| <span class="sourceLineNo">249</span> this.assignDispatchWaitMillis =<a name="line.249"></a> |
| <span class="sourceLineNo">250</span> conf.getInt(ASSIGN_DISPATCH_WAIT_MSEC_CONF_KEY, DEFAULT_ASSIGN_DISPATCH_WAIT_MSEC);<a name="line.250"></a> |
| <span class="sourceLineNo">251</span> this.assignDispatchWaitQueueMaxSize =<a name="line.251"></a> |
| <span class="sourceLineNo">252</span> conf.getInt(ASSIGN_DISPATCH_WAITQ_MAX_CONF_KEY, DEFAULT_ASSIGN_DISPATCH_WAITQ_MAX);<a name="line.252"></a> |
| <span class="sourceLineNo">253</span><a name="line.253"></a> |
| <span class="sourceLineNo">254</span> this.assignMaxAttempts =<a name="line.254"></a> |
| <span class="sourceLineNo">255</span> Math.max(1, conf.getInt(ASSIGN_MAX_ATTEMPTS, DEFAULT_ASSIGN_MAX_ATTEMPTS));<a name="line.255"></a> |
| <span class="sourceLineNo">256</span> this.assignRetryImmediatelyMaxAttempts = conf.getInt(ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS,<a name="line.256"></a> |
| <span class="sourceLineNo">257</span> DEFAULT_ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS);<a name="line.257"></a> |
| <span class="sourceLineNo">258</span><a name="line.258"></a> |
| <span class="sourceLineNo">259</span> int ritChoreInterval =<a name="line.259"></a> |
| <span class="sourceLineNo">260</span> conf.getInt(RIT_CHORE_INTERVAL_MSEC_CONF_KEY, DEFAULT_RIT_CHORE_INTERVAL_MSEC);<a name="line.260"></a> |
| <span class="sourceLineNo">261</span> this.ritChore = new RegionInTransitionChore(ritChoreInterval);<a name="line.261"></a> |
| <span class="sourceLineNo">262</span><a name="line.262"></a> |
| <span class="sourceLineNo">263</span> int deadRegionChoreInterval = conf.getInt(DEAD_REGION_METRIC_CHORE_INTERVAL_MSEC_CONF_KEY,<a name="line.263"></a> |
| <span class="sourceLineNo">264</span> DEFAULT_DEAD_REGION_METRIC_CHORE_INTERVAL_MSEC);<a name="line.264"></a> |
| <span class="sourceLineNo">265</span> if (deadRegionChoreInterval > 0) {<a name="line.265"></a> |
| <span class="sourceLineNo">266</span> this.deadMetricChore = new DeadServerMetricRegionChore(deadRegionChoreInterval);<a name="line.266"></a> |
| <span class="sourceLineNo">267</span> } else {<a name="line.267"></a> |
| <span class="sourceLineNo">268</span> this.deadMetricChore = null;<a name="line.268"></a> |
| <span class="sourceLineNo">269</span> }<a name="line.269"></a> |
| <span class="sourceLineNo">270</span> minVersionToMoveSysTables =<a name="line.270"></a> |
| <span class="sourceLineNo">271</span> conf.get(MIN_VERSION_MOVE_SYS_TABLES_CONFIG, DEFAULT_MIN_VERSION_MOVE_SYS_TABLES_CONFIG);<a name="line.271"></a> |
| <span class="sourceLineNo">272</span><a name="line.272"></a> |
| <span class="sourceLineNo">273</span> forceRegionRetainment =<a name="line.273"></a> |
| <span class="sourceLineNo">274</span> conf.getBoolean(FORCE_REGION_RETAINMENT, DEFAULT_FORCE_REGION_RETAINMENT);<a name="line.274"></a> |
| <span class="sourceLineNo">275</span> forceRegionRetainmentWaitInterval = conf.getLong(FORCE_REGION_RETAINMENT_WAIT_INTERVAL,<a name="line.275"></a> |
| <span class="sourceLineNo">276</span> DEFAULT_FORCE_REGION_RETAINMENT_WAIT_INTERVAL);<a name="line.276"></a> |
| <span class="sourceLineNo">277</span> forceRegionRetainmentRetries =<a name="line.277"></a> |
| <span class="sourceLineNo">278</span> conf.getInt(FORCE_REGION_RETAINMENT_RETRIES, DEFAULT_FORCE_REGION_RETAINMENT_RETRIES);<a name="line.278"></a> |
| <span class="sourceLineNo">279</span> }<a name="line.279"></a> |
| <span class="sourceLineNo">280</span><a name="line.280"></a> |
| <span class="sourceLineNo">281</span> private void mirrorMetaLocations() throws IOException, KeeperException {<a name="line.281"></a> |
| <span class="sourceLineNo">282</span> // For compatibility, mirror the meta region state to zookeeper<a name="line.282"></a> |
| <span class="sourceLineNo">283</span> // And we still need to use zookeeper to publish the meta region locations to region<a name="line.283"></a> |
| <span class="sourceLineNo">284</span> // server, so they can serve as ClientMetaService<a name="line.284"></a> |
| <span class="sourceLineNo">285</span> ZKWatcher zk = master.getZooKeeper();<a name="line.285"></a> |
| <span class="sourceLineNo">286</span> if (zk == null || !zk.getRecoverableZooKeeper().getState().isAlive()) {<a name="line.286"></a> |
| <span class="sourceLineNo">287</span> // this is possible in tests, we do not provide a zk watcher or the zk watcher has been closed<a name="line.287"></a> |
| <span class="sourceLineNo">288</span> return;<a name="line.288"></a> |
| <span class="sourceLineNo">289</span> }<a name="line.289"></a> |
| <span class="sourceLineNo">290</span> Collection<RegionStateNode> metaStates = regionStates.getRegionStateNodes();<a name="line.290"></a> |
| <span class="sourceLineNo">291</span> for (RegionStateNode metaState : metaStates) {<a name="line.291"></a> |
| <span class="sourceLineNo">292</span> MetaTableLocator.setMetaLocation(zk, metaState.getRegionLocation(),<a name="line.292"></a> |
| <span class="sourceLineNo">293</span> metaState.getRegionInfo().getReplicaId(), metaState.getState());<a name="line.293"></a> |
| <span class="sourceLineNo">294</span> }<a name="line.294"></a> |
| <span class="sourceLineNo">295</span> int replicaCount = metaStates.size();<a name="line.295"></a> |
| <span class="sourceLineNo">296</span> // remove extra mirror locations<a name="line.296"></a> |
| <span class="sourceLineNo">297</span> for (String znode : zk.getMetaReplicaNodes()) {<a name="line.297"></a> |
| <span class="sourceLineNo">298</span> int replicaId = zk.getZNodePaths().getMetaReplicaIdFromZNode(znode);<a name="line.298"></a> |
| <span class="sourceLineNo">299</span> if (replicaId >= replicaCount) {<a name="line.299"></a> |
| <span class="sourceLineNo">300</span> MetaTableLocator.deleteMetaLocation(zk, replicaId);<a name="line.300"></a> |
| <span class="sourceLineNo">301</span> }<a name="line.301"></a> |
| <span class="sourceLineNo">302</span> }<a name="line.302"></a> |
| <span class="sourceLineNo">303</span> }<a name="line.303"></a> |
| <span class="sourceLineNo">304</span><a name="line.304"></a> |
| <span class="sourceLineNo">305</span> public void start() throws IOException, KeeperException {<a name="line.305"></a> |
| <span class="sourceLineNo">306</span> if (!running.compareAndSet(false, true)) {<a name="line.306"></a> |
| <span class="sourceLineNo">307</span> return;<a name="line.307"></a> |
| <span class="sourceLineNo">308</span> }<a name="line.308"></a> |
| <span class="sourceLineNo">309</span><a name="line.309"></a> |
| <span class="sourceLineNo">310</span> LOG.trace("Starting assignment manager");<a name="line.310"></a> |
| <span class="sourceLineNo">311</span><a name="line.311"></a> |
| <span class="sourceLineNo">312</span> // Start the Assignment Thread<a name="line.312"></a> |
| <span class="sourceLineNo">313</span> startAssignmentThread();<a name="line.313"></a> |
| <span class="sourceLineNo">314</span> // load meta region states.<a name="line.314"></a> |
| <span class="sourceLineNo">315</span> // here we are still in the early steps of active master startup. There is only one thread(us)<a name="line.315"></a> |
| <span class="sourceLineNo">316</span> // can access AssignmentManager and create region node, so here we do not need to lock the<a name="line.316"></a> |
| <span class="sourceLineNo">317</span> // region node.<a name="line.317"></a> |
| <span class="sourceLineNo">318</span> try (ResultScanner scanner =<a name="line.318"></a> |
| <span class="sourceLineNo">319</span> masterRegion.getScanner(new Scan().addFamily(HConstants.CATALOG_FAMILY))) {<a name="line.319"></a> |
| <span class="sourceLineNo">320</span> for (;;) {<a name="line.320"></a> |
| <span class="sourceLineNo">321</span> Result result = scanner.next();<a name="line.321"></a> |
| <span class="sourceLineNo">322</span> if (result == null) {<a name="line.322"></a> |
| <span class="sourceLineNo">323</span> break;<a name="line.323"></a> |
| <span class="sourceLineNo">324</span> }<a name="line.324"></a> |
| <span class="sourceLineNo">325</span> RegionStateStore<a name="line.325"></a> |
| <span class="sourceLineNo">326</span> .visitMetaEntry((r, regionInfo, state, regionLocation, lastHost, openSeqNum) -> {<a name="line.326"></a> |
| <span class="sourceLineNo">327</span> RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo);<a name="line.327"></a> |
| <span class="sourceLineNo">328</span> regionNode.setState(state);<a name="line.328"></a> |
| <span class="sourceLineNo">329</span> regionNode.setLastHost(lastHost);<a name="line.329"></a> |
| <span class="sourceLineNo">330</span> regionNode.setRegionLocation(regionLocation);<a name="line.330"></a> |
| <span class="sourceLineNo">331</span> regionNode.setOpenSeqNum(openSeqNum);<a name="line.331"></a> |
| <span class="sourceLineNo">332</span> if (regionNode.getProcedure() != null) {<a name="line.332"></a> |
| <span class="sourceLineNo">333</span> regionNode.getProcedure().stateLoaded(this, regionNode);<a name="line.333"></a> |
| <span class="sourceLineNo">334</span> }<a name="line.334"></a> |
| <span class="sourceLineNo">335</span> if (regionLocation != null) {<a name="line.335"></a> |
| <span class="sourceLineNo">336</span> // TODO: this could lead to some orphan server state nodes, as it is possible that the<a name="line.336"></a> |
| <span class="sourceLineNo">337</span> // region server is already dead and its SCP has already finished but we have<a name="line.337"></a> |
| <span class="sourceLineNo">338</span> // persisted an opening state on this region server. Finally the TRSP will assign the<a name="line.338"></a> |
| <span class="sourceLineNo">339</span> // region to another region server, so it will not cause critical problems, just waste<a name="line.339"></a> |
| <span class="sourceLineNo">340</span> // some memory as no one will try to cleanup these orphan server state nodes.<a name="line.340"></a> |
| <span class="sourceLineNo">341</span> regionStates.createServer(regionLocation);<a name="line.341"></a> |
| <span class="sourceLineNo">342</span> regionStates.addRegionToServer(regionNode);<a name="line.342"></a> |
| <span class="sourceLineNo">343</span> }<a name="line.343"></a> |
| <span class="sourceLineNo">344</span> if (RegionReplicaUtil.isDefaultReplica(regionInfo.getReplicaId())) {<a name="line.344"></a> |
| <span class="sourceLineNo">345</span> setMetaAssigned(regionInfo, state == State.OPEN);<a name="line.345"></a> |
| <span class="sourceLineNo">346</span> }<a name="line.346"></a> |
| <span class="sourceLineNo">347</span> LOG.debug("Loaded hbase:meta {}", regionNode);<a name="line.347"></a> |
| <span class="sourceLineNo">348</span> }, result);<a name="line.348"></a> |
| <span class="sourceLineNo">349</span> }<a name="line.349"></a> |
| <span class="sourceLineNo">350</span> }<a name="line.350"></a> |
| <span class="sourceLineNo">351</span> mirrorMetaLocations();<a name="line.351"></a> |
| <span class="sourceLineNo">352</span> }<a name="line.352"></a> |
| <span class="sourceLineNo">353</span><a name="line.353"></a> |
| <span class="sourceLineNo">354</span> /**<a name="line.354"></a> |
| <span class="sourceLineNo">355</span> * Create RegionStateNode based on the TRSP list, and attach the TRSP to the RegionStateNode.<a name="line.355"></a> |
| <span class="sourceLineNo">356</span> * <p><a name="line.356"></a> |
| <span class="sourceLineNo">357</span> * This is used to restore the RIT region list, so we do not need to restore it in the loadingMeta<a name="line.357"></a> |
| <span class="sourceLineNo">358</span> * method below. And it is also very important as now before submitting a TRSP, we need to attach<a name="line.358"></a> |
| <span class="sourceLineNo">359</span> * it to the RegionStateNode, which acts like a guard, so we need to restore this information at<a name="line.359"></a> |
| <span class="sourceLineNo">360</span> * the very beginning, before we start processing any procedures.<a name="line.360"></a> |
| <span class="sourceLineNo">361</span> */<a name="line.361"></a> |
| <span class="sourceLineNo">362</span> public void setupRIT(List<TransitRegionStateProcedure> procs) {<a name="line.362"></a> |
| <span class="sourceLineNo">363</span> procs.forEach(proc -> {<a name="line.363"></a> |
| <span class="sourceLineNo">364</span> RegionInfo regionInfo = proc.getRegion();<a name="line.364"></a> |
| <span class="sourceLineNo">365</span> RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo);<a name="line.365"></a> |
| <span class="sourceLineNo">366</span> TransitRegionStateProcedure existingProc = regionNode.getProcedure();<a name="line.366"></a> |
| <span class="sourceLineNo">367</span> if (existingProc != null) {<a name="line.367"></a> |
| <span class="sourceLineNo">368</span> // This is possible, as we will detach the procedure from the RSN before we<a name="line.368"></a> |
| <span class="sourceLineNo">369</span> // actually finish the procedure. This is because that, we will detach the TRSP from the RSN<a name="line.369"></a> |
| <span class="sourceLineNo">370</span> // during execution, at that time, the procedure has not been marked as done in the pv2<a name="line.370"></a> |
| <span class="sourceLineNo">371</span> // framework yet, so it is possible that we schedule a new TRSP immediately and when<a name="line.371"></a> |
| <span class="sourceLineNo">372</span> // arriving here, we will find out that there are multiple TRSPs for the region. But we can<a name="line.372"></a> |
| <span class="sourceLineNo">373</span> // make sure that, only the last one can take the charge, the previous ones should have all<a name="line.373"></a> |
| <span class="sourceLineNo">374</span> // been finished already. So here we will compare the proc id, the greater one will win.<a name="line.374"></a> |
| <span class="sourceLineNo">375</span> if (existingProc.getProcId() < proc.getProcId()) {<a name="line.375"></a> |
| <span class="sourceLineNo">376</span> // the new one wins, unset and set it to the new one below<a name="line.376"></a> |
| <span class="sourceLineNo">377</span> regionNode.unsetProcedure(existingProc);<a name="line.377"></a> |
| <span class="sourceLineNo">378</span> } else {<a name="line.378"></a> |
| <span class="sourceLineNo">379</span> // the old one wins, skip<a name="line.379"></a> |
| <span class="sourceLineNo">380</span> return;<a name="line.380"></a> |
| <span class="sourceLineNo">381</span> }<a name="line.381"></a> |
| <span class="sourceLineNo">382</span> }<a name="line.382"></a> |
| <span class="sourceLineNo">383</span> LOG.info("Attach {} to {} to restore RIT", proc, regionNode);<a name="line.383"></a> |
| <span class="sourceLineNo">384</span> regionNode.setProcedure(proc);<a name="line.384"></a> |
| <span class="sourceLineNo">385</span> });<a name="line.385"></a> |
| <span class="sourceLineNo">386</span> }<a name="line.386"></a> |
| <span class="sourceLineNo">387</span><a name="line.387"></a> |
| <span class="sourceLineNo">388</span> public void stop() {<a name="line.388"></a> |
| <span class="sourceLineNo">389</span> if (!running.compareAndSet(true, false)) {<a name="line.389"></a> |
| <span class="sourceLineNo">390</span> return;<a name="line.390"></a> |
| <span class="sourceLineNo">391</span> }<a name="line.391"></a> |
| <span class="sourceLineNo">392</span><a name="line.392"></a> |
| <span class="sourceLineNo">393</span> LOG.info("Stopping assignment manager");<a name="line.393"></a> |
| <span class="sourceLineNo">394</span><a name="line.394"></a> |
| <span class="sourceLineNo">395</span> // The AM is started before the procedure executor,<a name="line.395"></a> |
| <span class="sourceLineNo">396</span> // but the actual work will be loaded/submitted only once we have the executor<a name="line.396"></a> |
| <span class="sourceLineNo">397</span> final boolean hasProcExecutor = master.getMasterProcedureExecutor() != null;<a name="line.397"></a> |
| <span class="sourceLineNo">398</span><a name="line.398"></a> |
| <span class="sourceLineNo">399</span> // Remove the RIT chore<a name="line.399"></a> |
| <span class="sourceLineNo">400</span> if (hasProcExecutor) {<a name="line.400"></a> |
| <span class="sourceLineNo">401</span> master.getMasterProcedureExecutor().removeChore(this.ritChore);<a name="line.401"></a> |
| <span class="sourceLineNo">402</span> if (this.deadMetricChore != null) {<a name="line.402"></a> |
| <span class="sourceLineNo">403</span> master.getMasterProcedureExecutor().removeChore(this.deadMetricChore);<a name="line.403"></a> |
| <span class="sourceLineNo">404</span> }<a name="line.404"></a> |
| <span class="sourceLineNo">405</span> }<a name="line.405"></a> |
| <span class="sourceLineNo">406</span><a name="line.406"></a> |
| <span class="sourceLineNo">407</span> // Stop the Assignment Thread<a name="line.407"></a> |
| <span class="sourceLineNo">408</span> stopAssignmentThread();<a name="line.408"></a> |
| <span class="sourceLineNo">409</span><a name="line.409"></a> |
| <span class="sourceLineNo">410</span> // Stop the RegionStateStore<a name="line.410"></a> |
| <span class="sourceLineNo">411</span> regionStates.clear();<a name="line.411"></a> |
| <span class="sourceLineNo">412</span><a name="line.412"></a> |
| <span class="sourceLineNo">413</span> // Update meta events (for testing)<a name="line.413"></a> |
| <span class="sourceLineNo">414</span> if (hasProcExecutor) {<a name="line.414"></a> |
| <span class="sourceLineNo">415</span> metaLoadEvent.suspend();<a name="line.415"></a> |
| <span class="sourceLineNo">416</span> for (RegionInfo hri : getMetaRegionSet()) {<a name="line.416"></a> |
| <span class="sourceLineNo">417</span> setMetaAssigned(hri, false);<a name="line.417"></a> |
| <span class="sourceLineNo">418</span> }<a name="line.418"></a> |
| <span class="sourceLineNo">419</span> }<a name="line.419"></a> |
| <span class="sourceLineNo">420</span> }<a name="line.420"></a> |
| <span class="sourceLineNo">421</span><a name="line.421"></a> |
| <span class="sourceLineNo">422</span> public boolean isRunning() {<a name="line.422"></a> |
| <span class="sourceLineNo">423</span> return running.get();<a name="line.423"></a> |
| <span class="sourceLineNo">424</span> }<a name="line.424"></a> |
| <span class="sourceLineNo">425</span><a name="line.425"></a> |
| <span class="sourceLineNo">426</span> public Configuration getConfiguration() {<a name="line.426"></a> |
| <span class="sourceLineNo">427</span> return master.getConfiguration();<a name="line.427"></a> |
| <span class="sourceLineNo">428</span> }<a name="line.428"></a> |
| <span class="sourceLineNo">429</span><a name="line.429"></a> |
| <span class="sourceLineNo">430</span> public MetricsAssignmentManager getAssignmentManagerMetrics() {<a name="line.430"></a> |
| <span class="sourceLineNo">431</span> return metrics;<a name="line.431"></a> |
| <span class="sourceLineNo">432</span> }<a name="line.432"></a> |
| <span class="sourceLineNo">433</span><a name="line.433"></a> |
| <span class="sourceLineNo">434</span> private LoadBalancer getBalancer() {<a name="line.434"></a> |
| <span class="sourceLineNo">435</span> return master.getLoadBalancer();<a name="line.435"></a> |
| <span class="sourceLineNo">436</span> }<a name="line.436"></a> |
| <span class="sourceLineNo">437</span><a name="line.437"></a> |
| <span class="sourceLineNo">438</span> private FavoredNodesPromoter getFavoredNodePromoter() {<a name="line.438"></a> |
| <span class="sourceLineNo">439</span> return (FavoredNodesPromoter) ((RSGroupBasedLoadBalancer) master.getLoadBalancer())<a name="line.439"></a> |
| <span class="sourceLineNo">440</span> .getInternalBalancer();<a name="line.440"></a> |
| <span class="sourceLineNo">441</span> }<a name="line.441"></a> |
| <span class="sourceLineNo">442</span><a name="line.442"></a> |
| <span class="sourceLineNo">443</span> private MasterProcedureEnv getProcedureEnvironment() {<a name="line.443"></a> |
| <span class="sourceLineNo">444</span> return master.getMasterProcedureExecutor().getEnvironment();<a name="line.444"></a> |
| <span class="sourceLineNo">445</span> }<a name="line.445"></a> |
| <span class="sourceLineNo">446</span><a name="line.446"></a> |
| <span class="sourceLineNo">447</span> private MasterProcedureScheduler getProcedureScheduler() {<a name="line.447"></a> |
| <span class="sourceLineNo">448</span> return getProcedureEnvironment().getProcedureScheduler();<a name="line.448"></a> |
| <span class="sourceLineNo">449</span> }<a name="line.449"></a> |
| <span class="sourceLineNo">450</span><a name="line.450"></a> |
| <span class="sourceLineNo">451</span> int getAssignMaxAttempts() {<a name="line.451"></a> |
| <span class="sourceLineNo">452</span> return assignMaxAttempts;<a name="line.452"></a> |
| <span class="sourceLineNo">453</span> }<a name="line.453"></a> |
| <span class="sourceLineNo">454</span><a name="line.454"></a> |
| <span class="sourceLineNo">455</span> public boolean isForceRegionRetainment() {<a name="line.455"></a> |
| <span class="sourceLineNo">456</span> return forceRegionRetainment;<a name="line.456"></a> |
| <span class="sourceLineNo">457</span> }<a name="line.457"></a> |
| <span class="sourceLineNo">458</span><a name="line.458"></a> |
| <span class="sourceLineNo">459</span> public long getForceRegionRetainmentWaitInterval() {<a name="line.459"></a> |
| <span class="sourceLineNo">460</span> return forceRegionRetainmentWaitInterval;<a name="line.460"></a> |
| <span class="sourceLineNo">461</span> }<a name="line.461"></a> |
| <span class="sourceLineNo">462</span><a name="line.462"></a> |
| <span class="sourceLineNo">463</span> public int getForceRegionRetainmentRetries() {<a name="line.463"></a> |
| <span class="sourceLineNo">464</span> return forceRegionRetainmentRetries;<a name="line.464"></a> |
| <span class="sourceLineNo">465</span> }<a name="line.465"></a> |
| <span class="sourceLineNo">466</span><a name="line.466"></a> |
| <span class="sourceLineNo">467</span> int getAssignRetryImmediatelyMaxAttempts() {<a name="line.467"></a> |
| <span class="sourceLineNo">468</span> return assignRetryImmediatelyMaxAttempts;<a name="line.468"></a> |
| <span class="sourceLineNo">469</span> }<a name="line.469"></a> |
| <span class="sourceLineNo">470</span><a name="line.470"></a> |
| <span class="sourceLineNo">471</span> public RegionStates getRegionStates() {<a name="line.471"></a> |
| <span class="sourceLineNo">472</span> return regionStates;<a name="line.472"></a> |
| <span class="sourceLineNo">473</span> }<a name="line.473"></a> |
| <span class="sourceLineNo">474</span><a name="line.474"></a> |
| <span class="sourceLineNo">475</span> /**<a name="line.475"></a> |
| <span class="sourceLineNo">476</span> * Returns the regions hosted by the specified server.<a name="line.476"></a> |
| <span class="sourceLineNo">477</span> * <p/><a name="line.477"></a> |
| <span class="sourceLineNo">478</span> * Notice that, for SCP, after we submit the SCP, no one can change the region list for the<a name="line.478"></a> |
| <span class="sourceLineNo">479</span> * ServerStateNode so we do not need any locks here. And for other usage, this can only give you a<a name="line.479"></a> |
| <span class="sourceLineNo">480</span> * snapshot of the current region list for this server, which means, right after you get the<a name="line.480"></a> |
| <span class="sourceLineNo">481</span> * region list, new regions may be moved to this server or some regions may be moved out from this<a name="line.481"></a> |
| <span class="sourceLineNo">482</span> * server, so you should not use it critically if you need strong consistency.<a name="line.482"></a> |
| <span class="sourceLineNo">483</span> */<a name="line.483"></a> |
| <span class="sourceLineNo">484</span> public List<RegionInfo> getRegionsOnServer(ServerName serverName) {<a name="line.484"></a> |
| <span class="sourceLineNo">485</span> ServerStateNode serverInfo = regionStates.getServerNode(serverName);<a name="line.485"></a> |
| <span class="sourceLineNo">486</span> if (serverInfo == null) {<a name="line.486"></a> |
| <span class="sourceLineNo">487</span> return Collections.emptyList();<a name="line.487"></a> |
| <span class="sourceLineNo">488</span> }<a name="line.488"></a> |
| <span class="sourceLineNo">489</span> return serverInfo.getRegionInfoList();<a name="line.489"></a> |
| <span class="sourceLineNo">490</span> }<a name="line.490"></a> |
| <span class="sourceLineNo">491</span><a name="line.491"></a> |
| <span class="sourceLineNo">492</span> private RegionInfo getRegionInfo(RegionStateNode rsn) {<a name="line.492"></a> |
| <span class="sourceLineNo">493</span> if (rsn.isSplit() && !rsn.getRegionInfo().isSplit()) {<a name="line.493"></a> |
| <span class="sourceLineNo">494</span> // see the comments in markRegionAsSplit on why we need to do this converting.<a name="line.494"></a> |
| <span class="sourceLineNo">495</span> return RegionInfoBuilder.newBuilder(rsn.getRegionInfo()).setSplit(true).setOffline(true)<a name="line.495"></a> |
| <span class="sourceLineNo">496</span> .build();<a name="line.496"></a> |
| <span class="sourceLineNo">497</span> } else {<a name="line.497"></a> |
| <span class="sourceLineNo">498</span> return rsn.getRegionInfo();<a name="line.498"></a> |
| <span class="sourceLineNo">499</span> }<a name="line.499"></a> |
| <span class="sourceLineNo">500</span> }<a name="line.500"></a> |
| <span class="sourceLineNo">501</span><a name="line.501"></a> |
| <span class="sourceLineNo">502</span> private Stream<RegionStateNode> getRegionStateNodes(TableName tableName,<a name="line.502"></a> |
| <span class="sourceLineNo">503</span> boolean excludeOfflinedSplitParents) {<a name="line.503"></a> |
| <span class="sourceLineNo">504</span> Stream<RegionStateNode> stream = regionStates.getTableRegionStateNodes(tableName).stream();<a name="line.504"></a> |
| <span class="sourceLineNo">505</span> if (excludeOfflinedSplitParents) {<a name="line.505"></a> |
| <span class="sourceLineNo">506</span> return stream.filter(rsn -> !rsn.isSplit());<a name="line.506"></a> |
| <span class="sourceLineNo">507</span> } else {<a name="line.507"></a> |
| <span class="sourceLineNo">508</span> return stream;<a name="line.508"></a> |
| <span class="sourceLineNo">509</span> }<a name="line.509"></a> |
| <span class="sourceLineNo">510</span> }<a name="line.510"></a> |
| <span class="sourceLineNo">511</span><a name="line.511"></a> |
| <span class="sourceLineNo">512</span> public List<RegionInfo> getTableRegions(TableName tableName,<a name="line.512"></a> |
| <span class="sourceLineNo">513</span> boolean excludeOfflinedSplitParents) {<a name="line.513"></a> |
| <span class="sourceLineNo">514</span> return getRegionStateNodes(tableName, excludeOfflinedSplitParents).map(this::getRegionInfo)<a name="line.514"></a> |
| <span class="sourceLineNo">515</span> .collect(Collectors.toList());<a name="line.515"></a> |
| <span class="sourceLineNo">516</span> }<a name="line.516"></a> |
| <span class="sourceLineNo">517</span><a name="line.517"></a> |
| <span class="sourceLineNo">518</span> public List<Pair<RegionInfo, ServerName>> getTableRegionsAndLocations(TableName tableName,<a name="line.518"></a> |
| <span class="sourceLineNo">519</span> boolean excludeOfflinedSplitParents) {<a name="line.519"></a> |
| <span class="sourceLineNo">520</span> return getRegionStateNodes(tableName, excludeOfflinedSplitParents)<a name="line.520"></a> |
| <span class="sourceLineNo">521</span> .map(rsn -> Pair.newPair(getRegionInfo(rsn), rsn.getRegionLocation()))<a name="line.521"></a> |
| <span class="sourceLineNo">522</span> .collect(Collectors.toList());<a name="line.522"></a> |
| <span class="sourceLineNo">523</span> }<a name="line.523"></a> |
| <span class="sourceLineNo">524</span><a name="line.524"></a> |
| <span class="sourceLineNo">525</span> public RegionStateStore getRegionStateStore() {<a name="line.525"></a> |
| <span class="sourceLineNo">526</span> return regionStateStore;<a name="line.526"></a> |
| <span class="sourceLineNo">527</span> }<a name="line.527"></a> |
| <span class="sourceLineNo">528</span><a name="line.528"></a> |
| <span class="sourceLineNo">529</span> public List<ServerName> getFavoredNodes(final RegionInfo regionInfo) {<a name="line.529"></a> |
| <span class="sourceLineNo">530</span> return this.shouldAssignRegionsWithFavoredNodes<a name="line.530"></a> |
| <span class="sourceLineNo">531</span> ? getFavoredNodePromoter().getFavoredNodes(regionInfo)<a name="line.531"></a> |
| <span class="sourceLineNo">532</span> : ServerName.EMPTY_SERVER_LIST;<a name="line.532"></a> |
| <span class="sourceLineNo">533</span> }<a name="line.533"></a> |
| <span class="sourceLineNo">534</span><a name="line.534"></a> |
| <span class="sourceLineNo">535</span> // ============================================================================================<a name="line.535"></a> |
| <span class="sourceLineNo">536</span> // Table State Manager helpers<a name="line.536"></a> |
| <span class="sourceLineNo">537</span> // ============================================================================================<a name="line.537"></a> |
| <span class="sourceLineNo">538</span> private TableStateManager getTableStateManager() {<a name="line.538"></a> |
| <span class="sourceLineNo">539</span> return master.getTableStateManager();<a name="line.539"></a> |
| <span class="sourceLineNo">540</span> }<a name="line.540"></a> |
| <span class="sourceLineNo">541</span><a name="line.541"></a> |
| <span class="sourceLineNo">542</span> private boolean isTableEnabled(final TableName tableName) {<a name="line.542"></a> |
| <span class="sourceLineNo">543</span> return getTableStateManager().isTableState(tableName, TableState.State.ENABLED);<a name="line.543"></a> |
| <span class="sourceLineNo">544</span> }<a name="line.544"></a> |
| <span class="sourceLineNo">545</span><a name="line.545"></a> |
| <span class="sourceLineNo">546</span> private boolean isTableDisabled(final TableName tableName) {<a name="line.546"></a> |
| <span class="sourceLineNo">547</span> return getTableStateManager().isTableState(tableName, TableState.State.DISABLED,<a name="line.547"></a> |
| <span class="sourceLineNo">548</span> TableState.State.DISABLING);<a name="line.548"></a> |
| <span class="sourceLineNo">549</span> }<a name="line.549"></a> |
| <span class="sourceLineNo">550</span><a name="line.550"></a> |
| <span class="sourceLineNo">551</span> // ============================================================================================<a name="line.551"></a> |
| <span class="sourceLineNo">552</span> // META Helpers<a name="line.552"></a> |
| <span class="sourceLineNo">553</span> // ============================================================================================<a name="line.553"></a> |
| <span class="sourceLineNo">554</span> private boolean isMetaRegion(final RegionInfo regionInfo) {<a name="line.554"></a> |
| <span class="sourceLineNo">555</span> return regionInfo.isMetaRegion();<a name="line.555"></a> |
| <span class="sourceLineNo">556</span> }<a name="line.556"></a> |
| <span class="sourceLineNo">557</span><a name="line.557"></a> |
| <span class="sourceLineNo">558</span> public boolean isMetaRegion(final byte[] regionName) {<a name="line.558"></a> |
| <span class="sourceLineNo">559</span> return getMetaRegionFromName(regionName) != null;<a name="line.559"></a> |
| <span class="sourceLineNo">560</span> }<a name="line.560"></a> |
| <span class="sourceLineNo">561</span><a name="line.561"></a> |
| <span class="sourceLineNo">562</span> public RegionInfo getMetaRegionFromName(final byte[] regionName) {<a name="line.562"></a> |
| <span class="sourceLineNo">563</span> for (RegionInfo hri : getMetaRegionSet()) {<a name="line.563"></a> |
| <span class="sourceLineNo">564</span> if (Bytes.equals(hri.getRegionName(), regionName)) {<a name="line.564"></a> |
| <span class="sourceLineNo">565</span> return hri;<a name="line.565"></a> |
| <span class="sourceLineNo">566</span> }<a name="line.566"></a> |
| <span class="sourceLineNo">567</span> }<a name="line.567"></a> |
| <span class="sourceLineNo">568</span> return null;<a name="line.568"></a> |
| <span class="sourceLineNo">569</span> }<a name="line.569"></a> |
| <span class="sourceLineNo">570</span><a name="line.570"></a> |
| <span class="sourceLineNo">571</span> public boolean isCarryingMeta(final ServerName serverName) {<a name="line.571"></a> |
| <span class="sourceLineNo">572</span> // TODO: handle multiple meta<a name="line.572"></a> |
| <span class="sourceLineNo">573</span> return isCarryingRegion(serverName, RegionInfoBuilder.FIRST_META_REGIONINFO);<a name="line.573"></a> |
| <span class="sourceLineNo">574</span> }<a name="line.574"></a> |
| <span class="sourceLineNo">575</span><a name="line.575"></a> |
| <span class="sourceLineNo">576</span> private boolean isCarryingRegion(final ServerName serverName, final RegionInfo regionInfo) {<a name="line.576"></a> |
| <span class="sourceLineNo">577</span> // TODO: check for state?<a name="line.577"></a> |
| <span class="sourceLineNo">578</span> final RegionStateNode node = regionStates.getRegionStateNode(regionInfo);<a name="line.578"></a> |
| <span class="sourceLineNo">579</span> return (node != null && serverName.equals(node.getRegionLocation()));<a name="line.579"></a> |
| <span class="sourceLineNo">580</span> }<a name="line.580"></a> |
| <span class="sourceLineNo">581</span><a name="line.581"></a> |
| <span class="sourceLineNo">582</span> private RegionInfo getMetaForRegion(final RegionInfo regionInfo) {<a name="line.582"></a> |
| <span class="sourceLineNo">583</span> // if (regionInfo.isMetaRegion()) return regionInfo;<a name="line.583"></a> |
| <span class="sourceLineNo">584</span> // TODO: handle multiple meta. if the region provided is not meta lookup<a name="line.584"></a> |
| <span class="sourceLineNo">585</span> // which meta the region belongs to.<a name="line.585"></a> |
| <span class="sourceLineNo">586</span> return RegionInfoBuilder.FIRST_META_REGIONINFO;<a name="line.586"></a> |
| <span class="sourceLineNo">587</span> }<a name="line.587"></a> |
| <span class="sourceLineNo">588</span><a name="line.588"></a> |
| <span class="sourceLineNo">589</span> // TODO: handle multiple meta.<a name="line.589"></a> |
| <span class="sourceLineNo">590</span> private static final Set<RegionInfo> META_REGION_SET =<a name="line.590"></a> |
| <span class="sourceLineNo">591</span> Collections.singleton(RegionInfoBuilder.FIRST_META_REGIONINFO);<a name="line.591"></a> |
| <span class="sourceLineNo">592</span><a name="line.592"></a> |
| <span class="sourceLineNo">593</span> public Set<RegionInfo> getMetaRegionSet() {<a name="line.593"></a> |
| <span class="sourceLineNo">594</span> return META_REGION_SET;<a name="line.594"></a> |
| <span class="sourceLineNo">595</span> }<a name="line.595"></a> |
| <span class="sourceLineNo">596</span><a name="line.596"></a> |
| <span class="sourceLineNo">597</span> // ============================================================================================<a name="line.597"></a> |
| <span class="sourceLineNo">598</span> // META Event(s) helpers<a name="line.598"></a> |
| <span class="sourceLineNo">599</span> // ============================================================================================<a name="line.599"></a> |
| <span class="sourceLineNo">600</span> /**<a name="line.600"></a> |
| <span class="sourceLineNo">601</span> * Notice that, this only means the meta region is available on a RS, but the AM may still be<a name="line.601"></a> |
| <span class="sourceLineNo">602</span> * loading the region states from meta, so usually you need to check {@link #isMetaLoaded()} first<a name="line.602"></a> |
| <span class="sourceLineNo">603</span> * before checking this method, unless you can make sure that your piece of code can only be<a name="line.603"></a> |
| <span class="sourceLineNo">604</span> * executed after AM builds the region states.<a name="line.604"></a> |
| <span class="sourceLineNo">605</span> * @see #isMetaLoaded()<a name="line.605"></a> |
| <span class="sourceLineNo">606</span> */<a name="line.606"></a> |
| <span class="sourceLineNo">607</span> public boolean isMetaAssigned() {<a name="line.607"></a> |
| <span class="sourceLineNo">608</span> return metaAssignEvent.isReady();<a name="line.608"></a> |
| <span class="sourceLineNo">609</span> }<a name="line.609"></a> |
| <span class="sourceLineNo">610</span><a name="line.610"></a> |
| <span class="sourceLineNo">611</span> public boolean isMetaRegionInTransition() {<a name="line.611"></a> |
| <span class="sourceLineNo">612</span> return !isMetaAssigned();<a name="line.612"></a> |
| <span class="sourceLineNo">613</span> }<a name="line.613"></a> |
| <span class="sourceLineNo">614</span><a name="line.614"></a> |
| <span class="sourceLineNo">615</span> /**<a name="line.615"></a> |
| <span class="sourceLineNo">616</span> * Notice that this event does not mean the AM has already finished region state rebuilding. See<a name="line.616"></a> |
| <span class="sourceLineNo">617</span> * the comment of {@link #isMetaAssigned()} for more details.<a name="line.617"></a> |
| <span class="sourceLineNo">618</span> * @see #isMetaAssigned()<a name="line.618"></a> |
| <span class="sourceLineNo">619</span> */<a name="line.619"></a> |
| <span class="sourceLineNo">620</span> public boolean waitMetaAssigned(Procedure<?> proc, RegionInfo regionInfo) {<a name="line.620"></a> |
| <span class="sourceLineNo">621</span> return getMetaAssignEvent(getMetaForRegion(regionInfo)).suspendIfNotReady(proc);<a name="line.621"></a> |
| <span class="sourceLineNo">622</span> }<a name="line.622"></a> |
| <span class="sourceLineNo">623</span><a name="line.623"></a> |
| <span class="sourceLineNo">624</span> private void setMetaAssigned(RegionInfo metaRegionInfo, boolean assigned) {<a name="line.624"></a> |
| <span class="sourceLineNo">625</span> assert isMetaRegion(metaRegionInfo) : "unexpected non-meta region " + metaRegionInfo;<a name="line.625"></a> |
| <span class="sourceLineNo">626</span> ProcedureEvent<?> metaAssignEvent = getMetaAssignEvent(metaRegionInfo);<a name="line.626"></a> |
| <span class="sourceLineNo">627</span> if (assigned) {<a name="line.627"></a> |
| <span class="sourceLineNo">628</span> metaAssignEvent.wake(getProcedureScheduler());<a name="line.628"></a> |
| <span class="sourceLineNo">629</span> } else {<a name="line.629"></a> |
| <span class="sourceLineNo">630</span> metaAssignEvent.suspend();<a name="line.630"></a> |
| <span class="sourceLineNo">631</span> }<a name="line.631"></a> |
| <span class="sourceLineNo">632</span> }<a name="line.632"></a> |
| <span class="sourceLineNo">633</span><a name="line.633"></a> |
| <span class="sourceLineNo">634</span> private ProcedureEvent<?> getMetaAssignEvent(RegionInfo metaRegionInfo) {<a name="line.634"></a> |
| <span class="sourceLineNo">635</span> assert isMetaRegion(metaRegionInfo) : "unexpected non-meta region " + metaRegionInfo;<a name="line.635"></a> |
| <span class="sourceLineNo">636</span> // TODO: handle multiple meta.<a name="line.636"></a> |
| <span class="sourceLineNo">637</span> return metaAssignEvent;<a name="line.637"></a> |
| <span class="sourceLineNo">638</span> }<a name="line.638"></a> |
| <span class="sourceLineNo">639</span><a name="line.639"></a> |
| <span class="sourceLineNo">640</span> /**<a name="line.640"></a> |
| <span class="sourceLineNo">641</span> * Wait until AM finishes the meta loading, i.e, the region states rebuilding.<a name="line.641"></a> |
| <span class="sourceLineNo">642</span> * @see #isMetaLoaded()<a name="line.642"></a> |
| <span class="sourceLineNo">643</span> * @see #waitMetaAssigned(Procedure, RegionInfo)<a name="line.643"></a> |
| <span class="sourceLineNo">644</span> */<a name="line.644"></a> |
| <span class="sourceLineNo">645</span> public boolean waitMetaLoaded(Procedure<?> proc) {<a name="line.645"></a> |
| <span class="sourceLineNo">646</span> return metaLoadEvent.suspendIfNotReady(proc);<a name="line.646"></a> |
| <span class="sourceLineNo">647</span> }<a name="line.647"></a> |
| <span class="sourceLineNo">648</span><a name="line.648"></a> |
| <span class="sourceLineNo">649</span> /**<a name="line.649"></a> |
| <span class="sourceLineNo">650</span> * This method will be called in master initialization method after calling<a name="line.650"></a> |
| <span class="sourceLineNo">651</span> * {@link #processOfflineRegions()}, as in processOfflineRegions we will generate assign<a name="line.651"></a> |
| <span class="sourceLineNo">652</span> * procedures for offline regions, which may be conflict with creating table.<a name="line.652"></a> |
| <span class="sourceLineNo">653</span> * <p/><a name="line.653"></a> |
| <span class="sourceLineNo">654</span> * This is a bit dirty, should be reconsidered after we decide whether to keep the<a name="line.654"></a> |
| <span class="sourceLineNo">655</span> * {@link #processOfflineRegions()} method.<a name="line.655"></a> |
| <span class="sourceLineNo">656</span> */<a name="line.656"></a> |
| <span class="sourceLineNo">657</span> public void wakeMetaLoadedEvent() {<a name="line.657"></a> |
| <span class="sourceLineNo">658</span> metaLoadEvent.wake(getProcedureScheduler());<a name="line.658"></a> |
| <span class="sourceLineNo">659</span> assert isMetaLoaded() : "expected meta to be loaded";<a name="line.659"></a> |
| <span class="sourceLineNo">660</span> }<a name="line.660"></a> |
| <span class="sourceLineNo">661</span><a name="line.661"></a> |
| <span class="sourceLineNo">662</span> /**<a name="line.662"></a> |
| <span class="sourceLineNo">663</span> * Return whether AM finishes the meta loading, i.e, the region states rebuilding.<a name="line.663"></a> |
| <span class="sourceLineNo">664</span> * @see #isMetaAssigned()<a name="line.664"></a> |
| <span class="sourceLineNo">665</span> * @see #waitMetaLoaded(Procedure)<a name="line.665"></a> |
| <span class="sourceLineNo">666</span> */<a name="line.666"></a> |
| <span class="sourceLineNo">667</span> public boolean isMetaLoaded() {<a name="line.667"></a> |
| <span class="sourceLineNo">668</span> return metaLoadEvent.isReady();<a name="line.668"></a> |
| <span class="sourceLineNo">669</span> }<a name="line.669"></a> |
| <span class="sourceLineNo">670</span><a name="line.670"></a> |
| <span class="sourceLineNo">671</span> /**<a name="line.671"></a> |
| <span class="sourceLineNo">672</span> * Start a new thread to check if there are region servers whose versions are higher than others.<a name="line.672"></a> |
| <span class="sourceLineNo">673</span> * If so, move all system table regions to RS with the highest version to keep compatibility. The<a name="line.673"></a> |
| <span class="sourceLineNo">674</span> * reason is, RS in new version may not be able to access RS in old version when there are some<a name="line.674"></a> |
| <span class="sourceLineNo">675</span> * incompatible changes.<a name="line.675"></a> |
| <span class="sourceLineNo">676</span> * <p><a name="line.676"></a> |
| <span class="sourceLineNo">677</span> * This method is called when a new RegionServer is added to cluster only.<a name="line.677"></a> |
| <span class="sourceLineNo">678</span> * </p><a name="line.678"></a> |
| <span class="sourceLineNo">679</span> */<a name="line.679"></a> |
| <span class="sourceLineNo">680</span> public void checkIfShouldMoveSystemRegionAsync() {<a name="line.680"></a> |
| <span class="sourceLineNo">681</span> // TODO: Fix this thread. If a server is killed and a new one started, this thread thinks that<a name="line.681"></a> |
| <span class="sourceLineNo">682</span> // it should 'move' the system tables from the old server to the new server but<a name="line.682"></a> |
| <span class="sourceLineNo">683</span> // ServerCrashProcedure is on it; and it will take care of the assign without dataloss.<a name="line.683"></a> |
| <span class="sourceLineNo">684</span> if (this.master.getServerManager().countOfRegionServers() <= 1) {<a name="line.684"></a> |
| <span class="sourceLineNo">685</span> return;<a name="line.685"></a> |
| <span class="sourceLineNo">686</span> }<a name="line.686"></a> |
| <span class="sourceLineNo">687</span> // This thread used to run whenever there was a change in the cluster. The ZooKeeper<a name="line.687"></a> |
| <span class="sourceLineNo">688</span> // childrenChanged notification came in before the nodeDeleted message and so this method<a name="line.688"></a> |
| <span class="sourceLineNo">689</span> // cold run before a ServerCrashProcedure could run. That meant that this thread could see<a name="line.689"></a> |
| <span class="sourceLineNo">690</span> // a Crashed Server before ServerCrashProcedure and it could find system regions on the<a name="line.690"></a> |
| <span class="sourceLineNo">691</span> // crashed server and go move them before ServerCrashProcedure had a chance; could be<a name="line.691"></a> |
| <span class="sourceLineNo">692</span> // dataloss too if WALs were not recovered.<a name="line.692"></a> |
| <span class="sourceLineNo">693</span> new Thread(() -> {<a name="line.693"></a> |
| <span class="sourceLineNo">694</span> try {<a name="line.694"></a> |
| <span class="sourceLineNo">695</span> synchronized (checkIfShouldMoveSystemRegionLock) {<a name="line.695"></a> |
| <span class="sourceLineNo">696</span> List<RegionPlan> plans = new ArrayList<>();<a name="line.696"></a> |
| <span class="sourceLineNo">697</span> // TODO: I don't think this code does a good job if all servers in cluster have same<a name="line.697"></a> |
| <span class="sourceLineNo">698</span> // version. It looks like it will schedule unnecessary moves.<a name="line.698"></a> |
| <span class="sourceLineNo">699</span> for (ServerName server : getExcludedServersForSystemTable()) {<a name="line.699"></a> |
| <span class="sourceLineNo">700</span> if (master.getServerManager().isServerDead(server)) {<a name="line.700"></a> |
| <span class="sourceLineNo">701</span> // TODO: See HBASE-18494 and HBASE-18495. Though getExcludedServersForSystemTable()<a name="line.701"></a> |
| <span class="sourceLineNo">702</span> // considers only online servers, the server could be queued for dead server<a name="line.702"></a> |
| <span class="sourceLineNo">703</span> // processing. As region assignments for crashed server is handled by<a name="line.703"></a> |
| <span class="sourceLineNo">704</span> // ServerCrashProcedure, do NOT handle them here. The goal is to handle this through<a name="line.704"></a> |
| <span class="sourceLineNo">705</span> // regular flow of LoadBalancer as a favored node and not to have this special<a name="line.705"></a> |
| <span class="sourceLineNo">706</span> // handling.<a name="line.706"></a> |
| <span class="sourceLineNo">707</span> continue;<a name="line.707"></a> |
| <span class="sourceLineNo">708</span> }<a name="line.708"></a> |
| <span class="sourceLineNo">709</span> List<RegionInfo> regionsShouldMove = getSystemTables(server);<a name="line.709"></a> |
| <span class="sourceLineNo">710</span> if (!regionsShouldMove.isEmpty()) {<a name="line.710"></a> |
| <span class="sourceLineNo">711</span> for (RegionInfo regionInfo : regionsShouldMove) {<a name="line.711"></a> |
| <span class="sourceLineNo">712</span> // null value for dest forces destination server to be selected by balancer<a name="line.712"></a> |
| <span class="sourceLineNo">713</span> RegionPlan plan = new RegionPlan(regionInfo, server, null);<a name="line.713"></a> |
| <span class="sourceLineNo">714</span> if (regionInfo.isMetaRegion()) {<a name="line.714"></a> |
| <span class="sourceLineNo">715</span> // Must move meta region first.<a name="line.715"></a> |
| <span class="sourceLineNo">716</span> LOG.info("Async MOVE of {} to newer Server={}", regionInfo.getEncodedName(),<a name="line.716"></a> |
| <span class="sourceLineNo">717</span> server);<a name="line.717"></a> |
| <span class="sourceLineNo">718</span> moveAsync(plan);<a name="line.718"></a> |
| <span class="sourceLineNo">719</span> } else {<a name="line.719"></a> |
| <span class="sourceLineNo">720</span> plans.add(plan);<a name="line.720"></a> |
| <span class="sourceLineNo">721</span> }<a name="line.721"></a> |
| <span class="sourceLineNo">722</span> }<a name="line.722"></a> |
| <span class="sourceLineNo">723</span> }<a name="line.723"></a> |
| <span class="sourceLineNo">724</span> for (RegionPlan plan : plans) {<a name="line.724"></a> |
| <span class="sourceLineNo">725</span> LOG.info("Async MOVE of {} to newer Server={}", plan.getRegionInfo().getEncodedName(),<a name="line.725"></a> |
| <span class="sourceLineNo">726</span> server);<a name="line.726"></a> |
| <span class="sourceLineNo">727</span> moveAsync(plan);<a name="line.727"></a> |
| <span class="sourceLineNo">728</span> }<a name="line.728"></a> |
| <span class="sourceLineNo">729</span> }<a name="line.729"></a> |
| <span class="sourceLineNo">730</span> }<a name="line.730"></a> |
| <span class="sourceLineNo">731</span> } catch (Throwable t) {<a name="line.731"></a> |
| <span class="sourceLineNo">732</span> LOG.error(t.toString(), t);<a name="line.732"></a> |
| <span class="sourceLineNo">733</span> }<a name="line.733"></a> |
| <span class="sourceLineNo">734</span> }).start();<a name="line.734"></a> |
| <span class="sourceLineNo">735</span> }<a name="line.735"></a> |
| <span class="sourceLineNo">736</span><a name="line.736"></a> |
| <span class="sourceLineNo">737</span> private List<RegionInfo> getSystemTables(ServerName serverName) {<a name="line.737"></a> |
| <span class="sourceLineNo">738</span> ServerStateNode serverNode = regionStates.getServerNode(serverName);<a name="line.738"></a> |
| <span class="sourceLineNo">739</span> if (serverNode == null) {<a name="line.739"></a> |
| <span class="sourceLineNo">740</span> return Collections.emptyList();<a name="line.740"></a> |
| <span class="sourceLineNo">741</span> }<a name="line.741"></a> |
| <span class="sourceLineNo">742</span> return serverNode.getSystemRegionInfoList();<a name="line.742"></a> |
| <span class="sourceLineNo">743</span> }<a name="line.743"></a> |
| <span class="sourceLineNo">744</span><a name="line.744"></a> |
| <span class="sourceLineNo">745</span> private void preTransitCheck(RegionStateNode regionNode, RegionState.State[] expectedStates)<a name="line.745"></a> |
| <span class="sourceLineNo">746</span> throws HBaseIOException {<a name="line.746"></a> |
| <span class="sourceLineNo">747</span> if (regionNode.getProcedure() != null) {<a name="line.747"></a> |
| <span class="sourceLineNo">748</span> throw new HBaseIOException(<a name="line.748"></a> |
| <span class="sourceLineNo">749</span> regionNode + " is currently in transition; pid=" + regionNode.getProcedure().getProcId());<a name="line.749"></a> |
| <span class="sourceLineNo">750</span> }<a name="line.750"></a> |
| <span class="sourceLineNo">751</span> if (!regionNode.isInState(expectedStates)) {<a name="line.751"></a> |
| <span class="sourceLineNo">752</span> throw new DoNotRetryRegionException(UNEXPECTED_STATE_REGION + regionNode);<a name="line.752"></a> |
| <span class="sourceLineNo">753</span> }<a name="line.753"></a> |
| <span class="sourceLineNo">754</span> if (isTableDisabled(regionNode.getTable())) {<a name="line.754"></a> |
| <span class="sourceLineNo">755</span> throw new DoNotRetryIOException(regionNode.getTable() + " is disabled for " + regionNode);<a name="line.755"></a> |
| <span class="sourceLineNo">756</span> }<a name="line.756"></a> |
| <span class="sourceLineNo">757</span> }<a name="line.757"></a> |
| <span class="sourceLineNo">758</span><a name="line.758"></a> |
| <span class="sourceLineNo">759</span> /**<a name="line.759"></a> |
| <span class="sourceLineNo">760</span> * Create an assign TransitRegionStateProcedure. Makes sure of RegionState. Throws exception if<a name="line.760"></a> |
| <span class="sourceLineNo">761</span> * not appropriate UNLESS override is set. Used by hbck2 but also by straightline<a name="line.761"></a> |
| <span class="sourceLineNo">762</span> * {@link #assign(RegionInfo, ServerName)} and {@link #assignAsync(RegionInfo, ServerName)}.<a name="line.762"></a> |
| <span class="sourceLineNo">763</span> * @see #createAssignProcedure(RegionStateNode, ServerName) for a version that does NO checking<a name="line.763"></a> |
| <span class="sourceLineNo">764</span> * used when only when no checking needed.<a name="line.764"></a> |
| <span class="sourceLineNo">765</span> * @param override If false, check RegionState is appropriate for assign; if not throw exception.<a name="line.765"></a> |
| <span class="sourceLineNo">766</span> */<a name="line.766"></a> |
| <span class="sourceLineNo">767</span> private TransitRegionStateProcedure createAssignProcedure(RegionInfo regionInfo, ServerName sn,<a name="line.767"></a> |
| <span class="sourceLineNo">768</span> boolean override, boolean force) throws IOException {<a name="line.768"></a> |
| <span class="sourceLineNo">769</span> RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo);<a name="line.769"></a> |
| <span class="sourceLineNo">770</span> regionNode.lock();<a name="line.770"></a> |
| <span class="sourceLineNo">771</span> try {<a name="line.771"></a> |
| <span class="sourceLineNo">772</span> if (override) {<a name="line.772"></a> |
| <span class="sourceLineNo">773</span> if (!force) {<a name="line.773"></a> |
| <span class="sourceLineNo">774</span> preTransitCheck(regionNode, STATES_EXPECTED_ON_ASSIGN);<a name="line.774"></a> |
| <span class="sourceLineNo">775</span> }<a name="line.775"></a> |
| <span class="sourceLineNo">776</span> if (regionNode.getProcedure() != null) {<a name="line.776"></a> |
| <span class="sourceLineNo">777</span> regionNode.unsetProcedure(regionNode.getProcedure());<a name="line.777"></a> |
| <span class="sourceLineNo">778</span> }<a name="line.778"></a> |
| <span class="sourceLineNo">779</span> } else {<a name="line.779"></a> |
| <span class="sourceLineNo">780</span> preTransitCheck(regionNode, STATES_EXPECTED_ON_ASSIGN);<a name="line.780"></a> |
| <span class="sourceLineNo">781</span> }<a name="line.781"></a> |
| <span class="sourceLineNo">782</span> assert regionNode.getProcedure() == null;<a name="line.782"></a> |
| <span class="sourceLineNo">783</span> return regionNode.setProcedure(<a name="line.783"></a> |
| <span class="sourceLineNo">784</span> TransitRegionStateProcedure.assign(getProcedureEnvironment(), regionInfo, sn));<a name="line.784"></a> |
| <span class="sourceLineNo">785</span> } finally {<a name="line.785"></a> |
| <span class="sourceLineNo">786</span> regionNode.unlock();<a name="line.786"></a> |
| <span class="sourceLineNo">787</span> }<a name="line.787"></a> |
| <span class="sourceLineNo">788</span> }<a name="line.788"></a> |
| <span class="sourceLineNo">789</span><a name="line.789"></a> |
| <span class="sourceLineNo">790</span> /**<a name="line.790"></a> |
| <span class="sourceLineNo">791</span> * Create an assign TransitRegionStateProcedure. Does NO checking of RegionState. Presumes<a name="line.791"></a> |
| <span class="sourceLineNo">792</span> * appriopriate state ripe for assign.<a name="line.792"></a> |
| <span class="sourceLineNo">793</span> * @see #createAssignProcedure(RegionInfo, ServerName, boolean, boolean)<a name="line.793"></a> |
| <span class="sourceLineNo">794</span> */<a name="line.794"></a> |
| <span class="sourceLineNo">795</span> private TransitRegionStateProcedure createAssignProcedure(RegionStateNode regionNode,<a name="line.795"></a> |
| <span class="sourceLineNo">796</span> ServerName targetServer) {<a name="line.796"></a> |
| <span class="sourceLineNo">797</span> regionNode.lock();<a name="line.797"></a> |
| <span class="sourceLineNo">798</span> try {<a name="line.798"></a> |
| <span class="sourceLineNo">799</span> return regionNode.setProcedure(TransitRegionStateProcedure.assign(getProcedureEnvironment(),<a name="line.799"></a> |
| <span class="sourceLineNo">800</span> regionNode.getRegionInfo(), targetServer));<a name="line.800"></a> |
| <span class="sourceLineNo">801</span> } finally {<a name="line.801"></a> |
| <span class="sourceLineNo">802</span> regionNode.unlock();<a name="line.802"></a> |
| <span class="sourceLineNo">803</span> }<a name="line.803"></a> |
| <span class="sourceLineNo">804</span> }<a name="line.804"></a> |
| <span class="sourceLineNo">805</span><a name="line.805"></a> |
| <span class="sourceLineNo">806</span> public long assign(RegionInfo regionInfo, ServerName sn) throws IOException {<a name="line.806"></a> |
| <span class="sourceLineNo">807</span> TransitRegionStateProcedure proc = createAssignProcedure(regionInfo, sn, false, false);<a name="line.807"></a> |
| <span class="sourceLineNo">808</span> ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), proc);<a name="line.808"></a> |
| <span class="sourceLineNo">809</span> return proc.getProcId();<a name="line.809"></a> |
| <span class="sourceLineNo">810</span> }<a name="line.810"></a> |
| <span class="sourceLineNo">811</span><a name="line.811"></a> |
| <span class="sourceLineNo">812</span> public long assign(RegionInfo regionInfo) throws IOException {<a name="line.812"></a> |
| <span class="sourceLineNo">813</span> return assign(regionInfo, null);<a name="line.813"></a> |
| <span class="sourceLineNo">814</span> }<a name="line.814"></a> |
| <span class="sourceLineNo">815</span><a name="line.815"></a> |
| <span class="sourceLineNo">816</span> /**<a name="line.816"></a> |
| <span class="sourceLineNo">817</span> * Submits a procedure that assigns a region to a target server without waiting for it to finish<a name="line.817"></a> |
| <span class="sourceLineNo">818</span> * @param regionInfo the region we would like to assign<a name="line.818"></a> |
| <span class="sourceLineNo">819</span> * @param sn target server name<a name="line.819"></a> |
| <span class="sourceLineNo">820</span> */<a name="line.820"></a> |
| <span class="sourceLineNo">821</span> public Future<byte[]> assignAsync(RegionInfo regionInfo, ServerName sn) throws IOException {<a name="line.821"></a> |
| <span class="sourceLineNo">822</span> return ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(),<a name="line.822"></a> |
| <span class="sourceLineNo">823</span> createAssignProcedure(regionInfo, sn, false, false));<a name="line.823"></a> |
| <span class="sourceLineNo">824</span> }<a name="line.824"></a> |
| <span class="sourceLineNo">825</span><a name="line.825"></a> |
| <span class="sourceLineNo">826</span> /**<a name="line.826"></a> |
| <span class="sourceLineNo">827</span> * Submits a procedure that assigns a region without waiting for it to finish<a name="line.827"></a> |
| <span class="sourceLineNo">828</span> * @param regionInfo the region we would like to assign<a name="line.828"></a> |
| <span class="sourceLineNo">829</span> */<a name="line.829"></a> |
| <span class="sourceLineNo">830</span> public Future<byte[]> assignAsync(RegionInfo regionInfo) throws IOException {<a name="line.830"></a> |
| <span class="sourceLineNo">831</span> return assignAsync(regionInfo, null);<a name="line.831"></a> |
| <span class="sourceLineNo">832</span> }<a name="line.832"></a> |
| <span class="sourceLineNo">833</span><a name="line.833"></a> |
| <span class="sourceLineNo">834</span> public long unassign(RegionInfo regionInfo) throws IOException {<a name="line.834"></a> |
| <span class="sourceLineNo">835</span> RegionStateNode regionNode = regionStates.getRegionStateNode(regionInfo);<a name="line.835"></a> |
| <span class="sourceLineNo">836</span> if (regionNode == null) {<a name="line.836"></a> |
| <span class="sourceLineNo">837</span> throw new UnknownRegionException("No RegionState found for " + regionInfo.getEncodedName());<a name="line.837"></a> |
| <span class="sourceLineNo">838</span> }<a name="line.838"></a> |
| <span class="sourceLineNo">839</span> TransitRegionStateProcedure proc;<a name="line.839"></a> |
| <span class="sourceLineNo">840</span> regionNode.lock();<a name="line.840"></a> |
| <span class="sourceLineNo">841</span> try {<a name="line.841"></a> |
| <span class="sourceLineNo">842</span> preTransitCheck(regionNode, STATES_EXPECTED_ON_UNASSIGN_OR_MOVE);<a name="line.842"></a> |
| <span class="sourceLineNo">843</span> proc = TransitRegionStateProcedure.unassign(getProcedureEnvironment(), regionInfo);<a name="line.843"></a> |
| <span class="sourceLineNo">844</span> regionNode.setProcedure(proc);<a name="line.844"></a> |
| <span class="sourceLineNo">845</span> } finally {<a name="line.845"></a> |
| <span class="sourceLineNo">846</span> regionNode.unlock();<a name="line.846"></a> |
| <span class="sourceLineNo">847</span> }<a name="line.847"></a> |
| <span class="sourceLineNo">848</span> ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), proc);<a name="line.848"></a> |
| <span class="sourceLineNo">849</span> return proc.getProcId();<a name="line.849"></a> |
| <span class="sourceLineNo">850</span> }<a name="line.850"></a> |
| <span class="sourceLineNo">851</span><a name="line.851"></a> |
| <span class="sourceLineNo">852</span> public TransitRegionStateProcedure createMoveRegionProcedure(RegionInfo regionInfo,<a name="line.852"></a> |
| <span class="sourceLineNo">853</span> ServerName targetServer) throws HBaseIOException {<a name="line.853"></a> |
| <span class="sourceLineNo">854</span> RegionStateNode regionNode = this.regionStates.getRegionStateNode(regionInfo);<a name="line.854"></a> |
| <span class="sourceLineNo">855</span> if (regionNode == null) {<a name="line.855"></a> |
| <span class="sourceLineNo">856</span> throw new UnknownRegionException(<a name="line.856"></a> |
| <span class="sourceLineNo">857</span> "No RegionStateNode found for " + regionInfo.getEncodedName() + "(Closed/Deleted?)");<a name="line.857"></a> |
| <span class="sourceLineNo">858</span> }<a name="line.858"></a> |
| <span class="sourceLineNo">859</span> TransitRegionStateProcedure proc;<a name="line.859"></a> |
| <span class="sourceLineNo">860</span> regionNode.lock();<a name="line.860"></a> |
| <span class="sourceLineNo">861</span> try {<a name="line.861"></a> |
| <span class="sourceLineNo">862</span> preTransitCheck(regionNode, STATES_EXPECTED_ON_UNASSIGN_OR_MOVE);<a name="line.862"></a> |
| <span class="sourceLineNo">863</span> regionNode.checkOnline();<a name="line.863"></a> |
| <span class="sourceLineNo">864</span> proc = TransitRegionStateProcedure.move(getProcedureEnvironment(), regionInfo, targetServer);<a name="line.864"></a> |
| <span class="sourceLineNo">865</span> regionNode.setProcedure(proc);<a name="line.865"></a> |
| <span class="sourceLineNo">866</span> } finally {<a name="line.866"></a> |
| <span class="sourceLineNo">867</span> regionNode.unlock();<a name="line.867"></a> |
| <span class="sourceLineNo">868</span> }<a name="line.868"></a> |
| <span class="sourceLineNo">869</span> return proc;<a name="line.869"></a> |
| <span class="sourceLineNo">870</span> }<a name="line.870"></a> |
| <span class="sourceLineNo">871</span><a name="line.871"></a> |
| <span class="sourceLineNo">872</span> public void move(RegionInfo regionInfo) throws IOException {<a name="line.872"></a> |
| <span class="sourceLineNo">873</span> TransitRegionStateProcedure proc = createMoveRegionProcedure(regionInfo, null);<a name="line.873"></a> |
| <span class="sourceLineNo">874</span> ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), proc);<a name="line.874"></a> |
| <span class="sourceLineNo">875</span> }<a name="line.875"></a> |
| <span class="sourceLineNo">876</span><a name="line.876"></a> |
| <span class="sourceLineNo">877</span> public Future<byte[]> moveAsync(RegionPlan regionPlan) throws HBaseIOException {<a name="line.877"></a> |
| <span class="sourceLineNo">878</span> TransitRegionStateProcedure proc =<a name="line.878"></a> |
| <span class="sourceLineNo">879</span> createMoveRegionProcedure(regionPlan.getRegionInfo(), regionPlan.getDestination());<a name="line.879"></a> |
| <span class="sourceLineNo">880</span> return ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(), proc);<a name="line.880"></a> |
| <span class="sourceLineNo">881</span> }<a name="line.881"></a> |
| <span class="sourceLineNo">882</span><a name="line.882"></a> |
| <span class="sourceLineNo">883</span> public Future<byte[]> balance(RegionPlan regionPlan) throws HBaseIOException {<a name="line.883"></a> |
| <span class="sourceLineNo">884</span> ServerName current =<a name="line.884"></a> |
| <span class="sourceLineNo">885</span> this.getRegionStates().getRegionAssignments().get(regionPlan.getRegionInfo());<a name="line.885"></a> |
| <span class="sourceLineNo">886</span> if (current == null || !current.equals(regionPlan.getSource())) {<a name="line.886"></a> |
| <span class="sourceLineNo">887</span> LOG.debug("Skip region plan {}, source server not match, current region location is {}",<a name="line.887"></a> |
| <span class="sourceLineNo">888</span> regionPlan, current == null ? "(null)" : current);<a name="line.888"></a> |
| <span class="sourceLineNo">889</span> return null;<a name="line.889"></a> |
| <span class="sourceLineNo">890</span> }<a name="line.890"></a> |
| <span class="sourceLineNo">891</span> return moveAsync(regionPlan);<a name="line.891"></a> |
| <span class="sourceLineNo">892</span> }<a name="line.892"></a> |
| <span class="sourceLineNo">893</span><a name="line.893"></a> |
| <span class="sourceLineNo">894</span> // ============================================================================================<a name="line.894"></a> |
| <span class="sourceLineNo">895</span> // RegionTransition procedures helpers<a name="line.895"></a> |
| <span class="sourceLineNo">896</span> // ============================================================================================<a name="line.896"></a> |
| <span class="sourceLineNo">897</span><a name="line.897"></a> |
| <span class="sourceLineNo">898</span> /**<a name="line.898"></a> |
| <span class="sourceLineNo">899</span> * Create round-robin assigns. Use on table creation to distribute out regions across cluster.<a name="line.899"></a> |
| <span class="sourceLineNo">900</span> * @return AssignProcedures made out of the passed in <code>hris</code> and a call to the balancer<a name="line.900"></a> |
| <span class="sourceLineNo">901</span> * to populate the assigns with targets chosen using round-robin (default balancer<a name="line.901"></a> |
| <span class="sourceLineNo">902</span> * scheme). If at assign-time, the target chosen is no longer up, thats fine, the<a name="line.902"></a> |
| <span class="sourceLineNo">903</span> * AssignProcedure will ask the balancer for a new target, and so on.<a name="line.903"></a> |
| <span class="sourceLineNo">904</span> */<a name="line.904"></a> |
| <span class="sourceLineNo">905</span> public TransitRegionStateProcedure[] createRoundRobinAssignProcedures(List<RegionInfo> hris,<a name="line.905"></a> |
| <span class="sourceLineNo">906</span> List<ServerName> serversToExclude) {<a name="line.906"></a> |
| <span class="sourceLineNo">907</span> if (hris.isEmpty()) {<a name="line.907"></a> |
| <span class="sourceLineNo">908</span> return new TransitRegionStateProcedure[0];<a name="line.908"></a> |
| <span class="sourceLineNo">909</span> }<a name="line.909"></a> |
| <span class="sourceLineNo">910</span><a name="line.910"></a> |
| <span class="sourceLineNo">911</span> if (<a name="line.911"></a> |
| <span class="sourceLineNo">912</span> serversToExclude != null && this.master.getServerManager().getOnlineServersList().size() == 1<a name="line.912"></a> |
| <span class="sourceLineNo">913</span> ) {<a name="line.913"></a> |
| <span class="sourceLineNo">914</span> LOG.debug("Only one region server found and hence going ahead with the assignment");<a name="line.914"></a> |
| <span class="sourceLineNo">915</span> serversToExclude = null;<a name="line.915"></a> |
| <span class="sourceLineNo">916</span> }<a name="line.916"></a> |
| <span class="sourceLineNo">917</span> try {<a name="line.917"></a> |
| <span class="sourceLineNo">918</span> // Ask the balancer to assign our regions. Pass the regions en masse. The balancer can do<a name="line.918"></a> |
| <span class="sourceLineNo">919</span> // a better job if it has all the assignments in the one lump.<a name="line.919"></a> |
| <span class="sourceLineNo">920</span> Map<ServerName, List<RegionInfo>> assignments = getBalancer().roundRobinAssignment(hris,<a name="line.920"></a> |
| <span class="sourceLineNo">921</span> this.master.getServerManager().createDestinationServersList(serversToExclude));<a name="line.921"></a> |
| <span class="sourceLineNo">922</span> // Return mid-method!<a name="line.922"></a> |
| <span class="sourceLineNo">923</span> return createAssignProcedures(assignments);<a name="line.923"></a> |
| <span class="sourceLineNo">924</span> } catch (IOException hioe) {<a name="line.924"></a> |
| <span class="sourceLineNo">925</span> LOG.warn("Failed roundRobinAssignment", hioe);<a name="line.925"></a> |
| <span class="sourceLineNo">926</span> }<a name="line.926"></a> |
| <span class="sourceLineNo">927</span> // If an error above, fall-through to this simpler assign. Last resort.<a name="line.927"></a> |
| <span class="sourceLineNo">928</span> return createAssignProcedures(hris);<a name="line.928"></a> |
| <span class="sourceLineNo">929</span> }<a name="line.929"></a> |
| <span class="sourceLineNo">930</span><a name="line.930"></a> |
| <span class="sourceLineNo">931</span> /**<a name="line.931"></a> |
| <span class="sourceLineNo">932</span> * Create round-robin assigns. Use on table creation to distribute out regions across cluster.<a name="line.932"></a> |
| <span class="sourceLineNo">933</span> * @return AssignProcedures made out of the passed in <code>hris</code> and a call to the balancer<a name="line.933"></a> |
| <span class="sourceLineNo">934</span> * to populate the assigns with targets chosen using round-robin (default balancer<a name="line.934"></a> |
| <span class="sourceLineNo">935</span> * scheme). If at assign-time, the target chosen is no longer up, thats fine, the<a name="line.935"></a> |
| <span class="sourceLineNo">936</span> * AssignProcedure will ask the balancer for a new target, and so on.<a name="line.936"></a> |
| <span class="sourceLineNo">937</span> */<a name="line.937"></a> |
| <span class="sourceLineNo">938</span> public TransitRegionStateProcedure[] createRoundRobinAssignProcedures(List<RegionInfo> hris) {<a name="line.938"></a> |
| <span class="sourceLineNo">939</span> return createRoundRobinAssignProcedures(hris, null);<a name="line.939"></a> |
| <span class="sourceLineNo">940</span> }<a name="line.940"></a> |
| <span class="sourceLineNo">941</span><a name="line.941"></a> |
| <span class="sourceLineNo">942</span> static int compare(TransitRegionStateProcedure left, TransitRegionStateProcedure right) {<a name="line.942"></a> |
| <span class="sourceLineNo">943</span> if (left.getRegion().isMetaRegion()) {<a name="line.943"></a> |
| <span class="sourceLineNo">944</span> if (right.getRegion().isMetaRegion()) {<a name="line.944"></a> |
| <span class="sourceLineNo">945</span> return RegionInfo.COMPARATOR.compare(left.getRegion(), right.getRegion());<a name="line.945"></a> |
| <span class="sourceLineNo">946</span> }<a name="line.946"></a> |
| <span class="sourceLineNo">947</span> return -1;<a name="line.947"></a> |
| <span class="sourceLineNo">948</span> } else if (right.getRegion().isMetaRegion()) {<a name="line.948"></a> |
| <span class="sourceLineNo">949</span> return +1;<a name="line.949"></a> |
| <span class="sourceLineNo">950</span> }<a name="line.950"></a> |
| <span class="sourceLineNo">951</span> if (left.getRegion().getTable().isSystemTable()) {<a name="line.951"></a> |
| <span class="sourceLineNo">952</span> if (right.getRegion().getTable().isSystemTable()) {<a name="line.952"></a> |
| <span class="sourceLineNo">953</span> return RegionInfo.COMPARATOR.compare(left.getRegion(), right.getRegion());<a name="line.953"></a> |
| <span class="sourceLineNo">954</span> }<a name="line.954"></a> |
| <span class="sourceLineNo">955</span> return -1;<a name="line.955"></a> |
| <span class="sourceLineNo">956</span> } else if (right.getRegion().getTable().isSystemTable()) {<a name="line.956"></a> |
| <span class="sourceLineNo">957</span> return +1;<a name="line.957"></a> |
| <span class="sourceLineNo">958</span> }<a name="line.958"></a> |
| <span class="sourceLineNo">959</span> return RegionInfo.COMPARATOR.compare(left.getRegion(), right.getRegion());<a name="line.959"></a> |
| <span class="sourceLineNo">960</span> }<a name="line.960"></a> |
| <span class="sourceLineNo">961</span><a name="line.961"></a> |
| <span class="sourceLineNo">962</span> /**<a name="line.962"></a> |
| <span class="sourceLineNo">963</span> * Create one TransitRegionStateProcedure to assign a region w/o specifying a target server. This<a name="line.963"></a> |
| <span class="sourceLineNo">964</span> * method is called from HBCK2.<a name="line.964"></a> |
| <span class="sourceLineNo">965</span> * @return an assign or null<a name="line.965"></a> |
| <span class="sourceLineNo">966</span> */<a name="line.966"></a> |
| <span class="sourceLineNo">967</span> public TransitRegionStateProcedure createOneAssignProcedure(RegionInfo ri, boolean override,<a name="line.967"></a> |
| <span class="sourceLineNo">968</span> boolean force) {<a name="line.968"></a> |
| <span class="sourceLineNo">969</span> TransitRegionStateProcedure trsp = null;<a name="line.969"></a> |
| <span class="sourceLineNo">970</span> try {<a name="line.970"></a> |
| <span class="sourceLineNo">971</span> trsp = createAssignProcedure(ri, null, override, force);<a name="line.971"></a> |
| <span class="sourceLineNo">972</span> } catch (IOException ioe) {<a name="line.972"></a> |
| <span class="sourceLineNo">973</span> LOG.info(<a name="line.973"></a> |
| <span class="sourceLineNo">974</span> "Failed {} assign, override={}"<a name="line.974"></a> |
| <span class="sourceLineNo">975</span> + (override ? "" : "; set override to by-pass state checks."),<a name="line.975"></a> |
| <span class="sourceLineNo">976</span> ri.getEncodedName(), override, ioe);<a name="line.976"></a> |
| <span class="sourceLineNo">977</span> }<a name="line.977"></a> |
| <span class="sourceLineNo">978</span> return trsp;<a name="line.978"></a> |
| <span class="sourceLineNo">979</span> }<a name="line.979"></a> |
| <span class="sourceLineNo">980</span><a name="line.980"></a> |
| <span class="sourceLineNo">981</span> /**<a name="line.981"></a> |
| <span class="sourceLineNo">982</span> * Create one TransitRegionStateProcedure to unassign a region. This method is called from HBCK2.<a name="line.982"></a> |
| <span class="sourceLineNo">983</span> * @return an unassign or null<a name="line.983"></a> |
| <span class="sourceLineNo">984</span> */<a name="line.984"></a> |
| <span class="sourceLineNo">985</span> public TransitRegionStateProcedure createOneUnassignProcedure(RegionInfo ri, boolean override,<a name="line.985"></a> |
| <span class="sourceLineNo">986</span> boolean force) {<a name="line.986"></a> |
| <span class="sourceLineNo">987</span> RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(ri);<a name="line.987"></a> |
| <span class="sourceLineNo">988</span> TransitRegionStateProcedure trsp = null;<a name="line.988"></a> |
| <span class="sourceLineNo">989</span> regionNode.lock();<a name="line.989"></a> |
| <span class="sourceLineNo">990</span> try {<a name="line.990"></a> |
| <span class="sourceLineNo">991</span> if (override) {<a name="line.991"></a> |
| <span class="sourceLineNo">992</span> if (!force) {<a name="line.992"></a> |
| <span class="sourceLineNo">993</span> preTransitCheck(regionNode, STATES_EXPECTED_ON_UNASSIGN_OR_MOVE);<a name="line.993"></a> |
| <span class="sourceLineNo">994</span> }<a name="line.994"></a> |
| <span class="sourceLineNo">995</span> if (regionNode.getProcedure() != null) {<a name="line.995"></a> |
| <span class="sourceLineNo">996</span> regionNode.unsetProcedure(regionNode.getProcedure());<a name="line.996"></a> |
| <span class="sourceLineNo">997</span> }<a name="line.997"></a> |
| <span class="sourceLineNo">998</span> } else {<a name="line.998"></a> |
| <span class="sourceLineNo">999</span> // This is where we could throw an exception; i.e. override is false.<a name="line.999"></a> |
| <span class="sourceLineNo">1000</span> preTransitCheck(regionNode, STATES_EXPECTED_ON_UNASSIGN_OR_MOVE);<a name="line.1000"></a> |
| <span class="sourceLineNo">1001</span> }<a name="line.1001"></a> |
| <span class="sourceLineNo">1002</span> assert regionNode.getProcedure() == null;<a name="line.1002"></a> |
| <span class="sourceLineNo">1003</span> trsp =<a name="line.1003"></a> |
| <span class="sourceLineNo">1004</span> TransitRegionStateProcedure.unassign(getProcedureEnvironment(), regionNode.getRegionInfo());<a name="line.1004"></a> |
| <span class="sourceLineNo">1005</span> regionNode.setProcedure(trsp);<a name="line.1005"></a> |
| <span class="sourceLineNo">1006</span> } catch (IOException ioe) {<a name="line.1006"></a> |
| <span class="sourceLineNo">1007</span> // 'override' must be false here.<a name="line.1007"></a> |
| <span class="sourceLineNo">1008</span> LOG.info("Failed {} unassign, override=false; set override to by-pass state checks.",<a name="line.1008"></a> |
| <span class="sourceLineNo">1009</span> ri.getEncodedName(), ioe);<a name="line.1009"></a> |
| <span class="sourceLineNo">1010</span> } finally {<a name="line.1010"></a> |
| <span class="sourceLineNo">1011</span> regionNode.unlock();<a name="line.1011"></a> |
| <span class="sourceLineNo">1012</span> }<a name="line.1012"></a> |
| <span class="sourceLineNo">1013</span> return trsp;<a name="line.1013"></a> |
| <span class="sourceLineNo">1014</span> }<a name="line.1014"></a> |
| <span class="sourceLineNo">1015</span><a name="line.1015"></a> |
| <span class="sourceLineNo">1016</span> /**<a name="line.1016"></a> |
| <span class="sourceLineNo">1017</span> * Create an array of TransitRegionStateProcedure w/o specifying a target server. Used as fallback<a name="line.1017"></a> |
| <span class="sourceLineNo">1018</span> * of caller is unable to do {@link #createAssignProcedures(Map)}.<a name="line.1018"></a> |
| <span class="sourceLineNo">1019</span> * <p/><a name="line.1019"></a> |
| <span class="sourceLineNo">1020</span> * If no target server, at assign time, we will try to use the former location of the region if<a name="line.1020"></a> |
| <span class="sourceLineNo">1021</span> * one exists. This is how we 'retain' the old location across a server restart.<a name="line.1021"></a> |
| <span class="sourceLineNo">1022</span> * <p/><a name="line.1022"></a> |
| <span class="sourceLineNo">1023</span> * Should only be called when you can make sure that no one can touch these regions other than<a name="line.1023"></a> |
| <span class="sourceLineNo">1024</span> * you. For example, when you are creating or enabling table. Presumes all Regions are in<a name="line.1024"></a> |
| <span class="sourceLineNo">1025</span> * appropriate state ripe for assign; no checking of Region state is done in here.<a name="line.1025"></a> |
| <span class="sourceLineNo">1026</span> * @see #createAssignProcedures(Map)<a name="line.1026"></a> |
| <span class="sourceLineNo">1027</span> */<a name="line.1027"></a> |
| <span class="sourceLineNo">1028</span> public TransitRegionStateProcedure[] createAssignProcedures(List<RegionInfo> hris) {<a name="line.1028"></a> |
| <span class="sourceLineNo">1029</span> return hris.stream().map(hri -> regionStates.getOrCreateRegionStateNode(hri))<a name="line.1029"></a> |
| <span class="sourceLineNo">1030</span> .map(regionNode -> createAssignProcedure(regionNode, null)).sorted(AssignmentManager::compare)<a name="line.1030"></a> |
| <span class="sourceLineNo">1031</span> .toArray(TransitRegionStateProcedure[]::new);<a name="line.1031"></a> |
| <span class="sourceLineNo">1032</span> }<a name="line.1032"></a> |
| <span class="sourceLineNo">1033</span><a name="line.1033"></a> |
| <span class="sourceLineNo">1034</span> /**<a name="line.1034"></a> |
| <span class="sourceLineNo">1035</span> * Tied to {@link #createAssignProcedures(List)} in that it is called if caller is unable to run<a name="line.1035"></a> |
| <span class="sourceLineNo">1036</span> * this method. Presumes all Regions are in appropriate state ripe for assign; no checking of<a name="line.1036"></a> |
| <span class="sourceLineNo">1037</span> * Region state is done in here.<a name="line.1037"></a> |
| <span class="sourceLineNo">1038</span> * @param assignments Map of assignments from which we produce an array of AssignProcedures.<a name="line.1038"></a> |
| <span class="sourceLineNo">1039</span> * @return Assignments made from the passed in <code>assignments</code><a name="line.1039"></a> |
| <span class="sourceLineNo">1040</span> * @see #createAssignProcedures(List)<a name="line.1040"></a> |
| <span class="sourceLineNo">1041</span> */<a name="line.1041"></a> |
| <span class="sourceLineNo">1042</span> private TransitRegionStateProcedure[]<a name="line.1042"></a> |
| <span class="sourceLineNo">1043</span> createAssignProcedures(Map<ServerName, List<RegionInfo>> assignments) {<a name="line.1043"></a> |
| <span class="sourceLineNo">1044</span> return assignments.entrySet().stream()<a name="line.1044"></a> |
| <span class="sourceLineNo">1045</span> .flatMap(e -> e.getValue().stream().map(hri -> regionStates.getOrCreateRegionStateNode(hri))<a name="line.1045"></a> |
| <span class="sourceLineNo">1046</span> .map(regionNode -> createAssignProcedure(regionNode, e.getKey())))<a name="line.1046"></a> |
| <span class="sourceLineNo">1047</span> .sorted(AssignmentManager::compare).toArray(TransitRegionStateProcedure[]::new);<a name="line.1047"></a> |
| <span class="sourceLineNo">1048</span> }<a name="line.1048"></a> |
| <span class="sourceLineNo">1049</span><a name="line.1049"></a> |
| <span class="sourceLineNo">1050</span> // for creating unassign TRSP when disabling a table or closing excess region replicas<a name="line.1050"></a> |
| <span class="sourceLineNo">1051</span> private TransitRegionStateProcedure forceCreateUnssignProcedure(RegionStateNode regionNode) {<a name="line.1051"></a> |
| <span class="sourceLineNo">1052</span> regionNode.lock();<a name="line.1052"></a> |
| <span class="sourceLineNo">1053</span> try {<a name="line.1053"></a> |
| <span class="sourceLineNo">1054</span> if (regionNode.isInState(State.OFFLINE, State.CLOSED, State.SPLIT)) {<a name="line.1054"></a> |
| <span class="sourceLineNo">1055</span> return null;<a name="line.1055"></a> |
| <span class="sourceLineNo">1056</span> }<a name="line.1056"></a> |
| <span class="sourceLineNo">1057</span> // in general, a split parent should be in CLOSED or SPLIT state, but anyway, let's check it<a name="line.1057"></a> |
| <span class="sourceLineNo">1058</span> // here for safety<a name="line.1058"></a> |
| <span class="sourceLineNo">1059</span> if (regionNode.getRegionInfo().isSplit()) {<a name="line.1059"></a> |
| <span class="sourceLineNo">1060</span> LOG.warn("{} is a split parent but not in CLOSED or SPLIT state", regionNode);<a name="line.1060"></a> |
| <span class="sourceLineNo">1061</span> return null;<a name="line.1061"></a> |
| <span class="sourceLineNo">1062</span> }<a name="line.1062"></a> |
| <span class="sourceLineNo">1063</span> // As in DisableTableProcedure or ModifyTableProcedure, we will hold the xlock for table, so<a name="line.1063"></a> |
| <span class="sourceLineNo">1064</span> // we can make sure that this procedure has not been executed yet, as TRSP will hold the<a name="line.1064"></a> |
| <span class="sourceLineNo">1065</span> // shared lock for table all the time. So here we will unset it and when it is actually<a name="line.1065"></a> |
| <span class="sourceLineNo">1066</span> // executed, it will find that the attach procedure is not itself and quit immediately.<a name="line.1066"></a> |
| <span class="sourceLineNo">1067</span> if (regionNode.getProcedure() != null) {<a name="line.1067"></a> |
| <span class="sourceLineNo">1068</span> regionNode.unsetProcedure(regionNode.getProcedure());<a name="line.1068"></a> |
| <span class="sourceLineNo">1069</span> }<a name="line.1069"></a> |
| <span class="sourceLineNo">1070</span> return regionNode.setProcedure(TransitRegionStateProcedure.unassign(getProcedureEnvironment(),<a name="line.1070"></a> |
| <span class="sourceLineNo">1071</span> regionNode.getRegionInfo()));<a name="line.1071"></a> |
| <span class="sourceLineNo">1072</span> } finally {<a name="line.1072"></a> |
| <span class="sourceLineNo">1073</span> regionNode.unlock();<a name="line.1073"></a> |
| <span class="sourceLineNo">1074</span> }<a name="line.1074"></a> |
| <span class="sourceLineNo">1075</span> }<a name="line.1075"></a> |
| <span class="sourceLineNo">1076</span><a name="line.1076"></a> |
| <span class="sourceLineNo">1077</span> /**<a name="line.1077"></a> |
| <span class="sourceLineNo">1078</span> * Called by DisableTableProcedure to unassign all the regions for a table.<a name="line.1078"></a> |
| <span class="sourceLineNo">1079</span> */<a name="line.1079"></a> |
| <span class="sourceLineNo">1080</span> public TransitRegionStateProcedure[] createUnassignProceduresForDisabling(TableName tableName) {<a name="line.1080"></a> |
| <span class="sourceLineNo">1081</span> return regionStates.getTableRegionStateNodes(tableName).stream()<a name="line.1081"></a> |
| <span class="sourceLineNo">1082</span> .map(this::forceCreateUnssignProcedure).filter(p -> p != null)<a name="line.1082"></a> |
| <span class="sourceLineNo">1083</span> .toArray(TransitRegionStateProcedure[]::new);<a name="line.1083"></a> |
| <span class="sourceLineNo">1084</span> }<a name="line.1084"></a> |
| <span class="sourceLineNo">1085</span><a name="line.1085"></a> |
| <span class="sourceLineNo">1086</span> /**<a name="line.1086"></a> |
| <span class="sourceLineNo">1087</span> * Called by ModifyTableProcedures to unassign all the excess region replicas for a table.<a name="line.1087"></a> |
| <span class="sourceLineNo">1088</span> */<a name="line.1088"></a> |
| <span class="sourceLineNo">1089</span> public TransitRegionStateProcedure[] createUnassignProceduresForClosingExcessRegionReplicas(<a name="line.1089"></a> |
| <span class="sourceLineNo">1090</span> TableName tableName, int newReplicaCount) {<a name="line.1090"></a> |
| <span class="sourceLineNo">1091</span> return regionStates.getTableRegionStateNodes(tableName).stream()<a name="line.1091"></a> |
| <span class="sourceLineNo">1092</span> .filter(regionNode -> regionNode.getRegionInfo().getReplicaId() >= newReplicaCount)<a name="line.1092"></a> |
| <span class="sourceLineNo">1093</span> .map(this::forceCreateUnssignProcedure).filter(p -> p != null)<a name="line.1093"></a> |
| <span class="sourceLineNo">1094</span> .toArray(TransitRegionStateProcedure[]::new);<a name="line.1094"></a> |
| <span class="sourceLineNo">1095</span> }<a name="line.1095"></a> |
| <span class="sourceLineNo">1096</span><a name="line.1096"></a> |
| <span class="sourceLineNo">1097</span> public SplitTableRegionProcedure createSplitProcedure(final RegionInfo regionToSplit,<a name="line.1097"></a> |
| <span class="sourceLineNo">1098</span> final byte[] splitKey) throws IOException {<a name="line.1098"></a> |
| <span class="sourceLineNo">1099</span> return new SplitTableRegionProcedure(getProcedureEnvironment(), regionToSplit, splitKey);<a name="line.1099"></a> |
| <span class="sourceLineNo">1100</span> }<a name="line.1100"></a> |
| <span class="sourceLineNo">1101</span><a name="line.1101"></a> |
| <span class="sourceLineNo">1102</span> public TruncateRegionProcedure createTruncateRegionProcedure(final RegionInfo regionToTruncate)<a name="line.1102"></a> |
| <span class="sourceLineNo">1103</span> throws IOException {<a name="line.1103"></a> |
| <span class="sourceLineNo">1104</span> return new TruncateRegionProcedure(getProcedureEnvironment(), regionToTruncate);<a name="line.1104"></a> |
| <span class="sourceLineNo">1105</span> }<a name="line.1105"></a> |
| <span class="sourceLineNo">1106</span><a name="line.1106"></a> |
| <span class="sourceLineNo">1107</span> public MergeTableRegionsProcedure createMergeProcedure(RegionInfo... ris) throws IOException {<a name="line.1107"></a> |
| <span class="sourceLineNo">1108</span> return new MergeTableRegionsProcedure(getProcedureEnvironment(), ris, false);<a name="line.1108"></a> |
| <span class="sourceLineNo">1109</span> }<a name="line.1109"></a> |
| <span class="sourceLineNo">1110</span><a name="line.1110"></a> |
| <span class="sourceLineNo">1111</span> /**<a name="line.1111"></a> |
| <span class="sourceLineNo">1112</span> * Delete the region states. This is called by "DeleteTable"<a name="line.1112"></a> |
| <span class="sourceLineNo">1113</span> */<a name="line.1113"></a> |
| <span class="sourceLineNo">1114</span> public void deleteTable(final TableName tableName) throws IOException {<a name="line.1114"></a> |
| <span class="sourceLineNo">1115</span> final ArrayList<RegionInfo> regions = regionStates.getTableRegionsInfo(tableName);<a name="line.1115"></a> |
| <span class="sourceLineNo">1116</span> regionStateStore.deleteRegions(regions);<a name="line.1116"></a> |
| <span class="sourceLineNo">1117</span> for (int i = 0; i < regions.size(); ++i) {<a name="line.1117"></a> |
| <span class="sourceLineNo">1118</span> final RegionInfo regionInfo = regions.get(i);<a name="line.1118"></a> |
| <span class="sourceLineNo">1119</span> regionStates.deleteRegion(regionInfo);<a name="line.1119"></a> |
| <span class="sourceLineNo">1120</span> }<a name="line.1120"></a> |
| <span class="sourceLineNo">1121</span> }<a name="line.1121"></a> |
| <span class="sourceLineNo">1122</span><a name="line.1122"></a> |
| <span class="sourceLineNo">1123</span> // ============================================================================================<a name="line.1123"></a> |
| <span class="sourceLineNo">1124</span> // RS Region Transition Report helpers<a name="line.1124"></a> |
| <span class="sourceLineNo">1125</span> // ============================================================================================<a name="line.1125"></a> |
| <span class="sourceLineNo">1126</span> private void reportRegionStateTransition(ReportRegionStateTransitionResponse.Builder builder,<a name="line.1126"></a> |
| <span class="sourceLineNo">1127</span> ServerStateNode serverNode, List<RegionStateTransition> transitionList) throws IOException {<a name="line.1127"></a> |
| <span class="sourceLineNo">1128</span> for (RegionStateTransition transition : transitionList) {<a name="line.1128"></a> |
| <span class="sourceLineNo">1129</span> switch (transition.getTransitionCode()) {<a name="line.1129"></a> |
| <span class="sourceLineNo">1130</span> case OPENED:<a name="line.1130"></a> |
| <span class="sourceLineNo">1131</span> case FAILED_OPEN:<a name="line.1131"></a> |
| <span class="sourceLineNo">1132</span> case CLOSED:<a name="line.1132"></a> |
| <span class="sourceLineNo">1133</span> assert transition.getRegionInfoCount() == 1 : transition;<a name="line.1133"></a> |
| <span class="sourceLineNo">1134</span> final RegionInfo hri = ProtobufUtil.toRegionInfo(transition.getRegionInfo(0));<a name="line.1134"></a> |
| <span class="sourceLineNo">1135</span> long procId =<a name="line.1135"></a> |
| <span class="sourceLineNo">1136</span> transition.getProcIdCount() > 0 ? transition.getProcId(0) : Procedure.NO_PROC_ID;<a name="line.1136"></a> |
| <span class="sourceLineNo">1137</span> updateRegionTransition(serverNode, transition.getTransitionCode(), hri,<a name="line.1137"></a> |
| <span class="sourceLineNo">1138</span> transition.hasOpenSeqNum() ? transition.getOpenSeqNum() : HConstants.NO_SEQNUM, procId);<a name="line.1138"></a> |
| <span class="sourceLineNo">1139</span> break;<a name="line.1139"></a> |
| <span class="sourceLineNo">1140</span> case READY_TO_SPLIT:<a name="line.1140"></a> |
| <span class="sourceLineNo">1141</span> case SPLIT:<a name="line.1141"></a> |
| <span class="sourceLineNo">1142</span> case SPLIT_REVERTED:<a name="line.1142"></a> |
| <span class="sourceLineNo">1143</span> assert transition.getRegionInfoCount() == 3 : transition;<a name="line.1143"></a> |
| <span class="sourceLineNo">1144</span> final RegionInfo parent = ProtobufUtil.toRegionInfo(transition.getRegionInfo(0));<a name="line.1144"></a> |
| <span class="sourceLineNo">1145</span> final RegionInfo splitA = ProtobufUtil.toRegionInfo(transition.getRegionInfo(1));<a name="line.1145"></a> |
| <span class="sourceLineNo">1146</span> final RegionInfo splitB = ProtobufUtil.toRegionInfo(transition.getRegionInfo(2));<a name="line.1146"></a> |
| <span class="sourceLineNo">1147</span> updateRegionSplitTransition(serverNode, transition.getTransitionCode(), parent, splitA,<a name="line.1147"></a> |
| <span class="sourceLineNo">1148</span> splitB);<a name="line.1148"></a> |
| <span class="sourceLineNo">1149</span> break;<a name="line.1149"></a> |
| <span class="sourceLineNo">1150</span> case READY_TO_MERGE:<a name="line.1150"></a> |
| <span class="sourceLineNo">1151</span> case MERGED:<a name="line.1151"></a> |
| <span class="sourceLineNo">1152</span> case MERGE_REVERTED:<a name="line.1152"></a> |
| <span class="sourceLineNo">1153</span> assert transition.getRegionInfoCount() == 3 : transition;<a name="line.1153"></a> |
| <span class="sourceLineNo">1154</span> final RegionInfo merged = ProtobufUtil.toRegionInfo(transition.getRegionInfo(0));<a name="line.1154"></a> |
| <span class="sourceLineNo">1155</span> final RegionInfo mergeA = ProtobufUtil.toRegionInfo(transition.getRegionInfo(1));<a name="line.1155"></a> |
| <span class="sourceLineNo">1156</span> final RegionInfo mergeB = ProtobufUtil.toRegionInfo(transition.getRegionInfo(2));<a name="line.1156"></a> |
| <span class="sourceLineNo">1157</span> updateRegionMergeTransition(serverNode, transition.getTransitionCode(), merged, mergeA,<a name="line.1157"></a> |
| <span class="sourceLineNo">1158</span> mergeB);<a name="line.1158"></a> |
| <span class="sourceLineNo">1159</span> break;<a name="line.1159"></a> |
| <span class="sourceLineNo">1160</span> }<a name="line.1160"></a> |
| <span class="sourceLineNo">1161</span> }<a name="line.1161"></a> |
| <span class="sourceLineNo">1162</span> }<a name="line.1162"></a> |
| <span class="sourceLineNo">1163</span><a name="line.1163"></a> |
| <span class="sourceLineNo">1164</span> public ReportRegionStateTransitionResponse reportRegionStateTransition(<a name="line.1164"></a> |
| <span class="sourceLineNo">1165</span> final ReportRegionStateTransitionRequest req) throws PleaseHoldException {<a name="line.1165"></a> |
| <span class="sourceLineNo">1166</span> ReportRegionStateTransitionResponse.Builder builder =<a name="line.1166"></a> |
| <span class="sourceLineNo">1167</span> ReportRegionStateTransitionResponse.newBuilder();<a name="line.1167"></a> |
| <span class="sourceLineNo">1168</span> ServerName serverName = ProtobufUtil.toServerName(req.getServer());<a name="line.1168"></a> |
| <span class="sourceLineNo">1169</span> ServerStateNode serverNode = regionStates.getServerNode(serverName);<a name="line.1169"></a> |
| <span class="sourceLineNo">1170</span> if (serverNode == null) {<a name="line.1170"></a> |
| <span class="sourceLineNo">1171</span> LOG.warn("No server node for {}", serverName);<a name="line.1171"></a> |
| <span class="sourceLineNo">1172</span> builder.setErrorMessage("No server node for " + serverName);<a name="line.1172"></a> |
| <span class="sourceLineNo">1173</span> return builder.build();<a name="line.1173"></a> |
| <span class="sourceLineNo">1174</span> }<a name="line.1174"></a> |
| <span class="sourceLineNo">1175</span> // here we have to acquire a read lock instead of a simple exclusive lock. This is because that<a name="line.1175"></a> |
| <span class="sourceLineNo">1176</span> // we should not block other reportRegionStateTransition call from the same region server. This<a name="line.1176"></a> |
| <span class="sourceLineNo">1177</span> // is not only about performance, but also to prevent dead lock. Think of the meta region is<a name="line.1177"></a> |
| <span class="sourceLineNo">1178</span> // also on the same region server and you hold the lock which blocks the<a name="line.1178"></a> |
| <span class="sourceLineNo">1179</span> // reportRegionStateTransition for meta, and since meta is not online, you will block inside the<a name="line.1179"></a> |
| <span class="sourceLineNo">1180</span> // lock protection to wait for meta online...<a name="line.1180"></a> |
| <span class="sourceLineNo">1181</span> serverNode.readLock().lock();<a name="line.1181"></a> |
| <span class="sourceLineNo">1182</span> try {<a name="line.1182"></a> |
| <span class="sourceLineNo">1183</span> // we only accept reportRegionStateTransition if the region server is online, see the comment<a name="line.1183"></a> |
| <span class="sourceLineNo">1184</span> // above in submitServerCrash method and HBASE-21508 for more details.<a name="line.1184"></a> |
| <span class="sourceLineNo">1185</span> if (serverNode.isInState(ServerState.ONLINE)) {<a name="line.1185"></a> |
| <span class="sourceLineNo">1186</span> try {<a name="line.1186"></a> |
| <span class="sourceLineNo">1187</span> reportRegionStateTransition(builder, serverNode, req.getTransitionList());<a name="line.1187"></a> |
| <span class="sourceLineNo">1188</span> } catch (PleaseHoldException e) {<a name="line.1188"></a> |
| <span class="sourceLineNo">1189</span> LOG.trace("Failed transition ", e);<a name="line.1189"></a> |
| <span class="sourceLineNo">1190</span> throw e;<a name="line.1190"></a> |
| <span class="sourceLineNo">1191</span> } catch (UnsupportedOperationException | IOException e) {<a name="line.1191"></a> |
| <span class="sourceLineNo">1192</span> // TODO: at the moment we have a single error message and the RS will abort<a name="line.1192"></a> |
| <span class="sourceLineNo">1193</span> // if the master says that one of the region transitions failed.<a name="line.1193"></a> |
| <span class="sourceLineNo">1194</span> LOG.warn("Failed transition", e);<a name="line.1194"></a> |
| <span class="sourceLineNo">1195</span> builder.setErrorMessage("Failed transition " + e.getMessage());<a name="line.1195"></a> |
| <span class="sourceLineNo">1196</span> }<a name="line.1196"></a> |
| <span class="sourceLineNo">1197</span> } else {<a name="line.1197"></a> |
| <span class="sourceLineNo">1198</span> LOG.warn("The region server {} is already dead, skip reportRegionStateTransition call",<a name="line.1198"></a> |
| <span class="sourceLineNo">1199</span> serverName);<a name="line.1199"></a> |
| <span class="sourceLineNo">1200</span> builder.setErrorMessage("You are dead");<a name="line.1200"></a> |
| <span class="sourceLineNo">1201</span> }<a name="line.1201"></a> |
| <span class="sourceLineNo">1202</span> } finally {<a name="line.1202"></a> |
| <span class="sourceLineNo">1203</span> serverNode.readLock().unlock();<a name="line.1203"></a> |
| <span class="sourceLineNo">1204</span> }<a name="line.1204"></a> |
| <span class="sourceLineNo">1205</span><a name="line.1205"></a> |
| <span class="sourceLineNo">1206</span> return builder.build();<a name="line.1206"></a> |
| <span class="sourceLineNo">1207</span> }<a name="line.1207"></a> |
| <span class="sourceLineNo">1208</span><a name="line.1208"></a> |
| <span class="sourceLineNo">1209</span> private void updateRegionTransition(ServerStateNode serverNode, TransitionCode state,<a name="line.1209"></a> |
| <span class="sourceLineNo">1210</span> RegionInfo regionInfo, long seqId, long procId) throws IOException {<a name="line.1210"></a> |
| <span class="sourceLineNo">1211</span> checkMetaLoaded(regionInfo);<a name="line.1211"></a> |
| <span class="sourceLineNo">1212</span><a name="line.1212"></a> |
| <span class="sourceLineNo">1213</span> RegionStateNode regionNode = regionStates.getRegionStateNode(regionInfo);<a name="line.1213"></a> |
| <span class="sourceLineNo">1214</span> if (regionNode == null) {<a name="line.1214"></a> |
| <span class="sourceLineNo">1215</span> // the table/region is gone. maybe a delete, split, merge<a name="line.1215"></a> |
| <span class="sourceLineNo">1216</span> throw new UnexpectedStateException(String.format(<a name="line.1216"></a> |
| <span class="sourceLineNo">1217</span> "Server %s was trying to transition region %s to %s. but Region is not known.",<a name="line.1217"></a> |
| <span class="sourceLineNo">1218</span> serverNode.getServerName(), regionInfo, state));<a name="line.1218"></a> |
| <span class="sourceLineNo">1219</span> }<a name="line.1219"></a> |
| <span class="sourceLineNo">1220</span> LOG.trace("Update region transition serverName={} region={} regionState={}",<a name="line.1220"></a> |
| <span class="sourceLineNo">1221</span> serverNode.getServerName(), regionNode, state);<a name="line.1221"></a> |
| <span class="sourceLineNo">1222</span><a name="line.1222"></a> |
| <span class="sourceLineNo">1223</span> regionNode.lock();<a name="line.1223"></a> |
| <span class="sourceLineNo">1224</span> try {<a name="line.1224"></a> |
| <span class="sourceLineNo">1225</span> if (!reportTransition(regionNode, serverNode, state, seqId, procId)) {<a name="line.1225"></a> |
| <span class="sourceLineNo">1226</span> // Don't log WARN if shutting down cluster; during shutdown. Avoid the below messages:<a name="line.1226"></a> |
| <span class="sourceLineNo">1227</span> // 2018-08-13 10:45:10,551 WARN ...AssignmentManager: No matching procedure found for<a name="line.1227"></a> |
| <span class="sourceLineNo">1228</span> // rit=OPEN, location=ve0538.halxg.cloudera.com,16020,1533493000958,<a name="line.1228"></a> |
| <span class="sourceLineNo">1229</span> // table=IntegrationTestBigLinkedList, region=65ab289e2fc1530df65f6c3d7cde7aa5 transition<a name="line.1229"></a> |
| <span class="sourceLineNo">1230</span> // to CLOSED<a name="line.1230"></a> |
| <span class="sourceLineNo">1231</span> // These happen because on cluster shutdown, we currently let the RegionServers close<a name="line.1231"></a> |
| <span class="sourceLineNo">1232</span> // regions. This is the only time that region close is not run by the Master (so cluster<a name="line.1232"></a> |
| <span class="sourceLineNo">1233</span> // goes down fast). Consider changing it so Master runs all shutdowns.<a name="line.1233"></a> |
| <span class="sourceLineNo">1234</span> if (<a name="line.1234"></a> |
| <span class="sourceLineNo">1235</span> this.master.getServerManager().isClusterShutdown() && state.equals(TransitionCode.CLOSED)<a name="line.1235"></a> |
| <span class="sourceLineNo">1236</span> ) {<a name="line.1236"></a> |
| <span class="sourceLineNo">1237</span> LOG.info("RegionServer {} {}", state, regionNode.getRegionInfo().getEncodedName());<a name="line.1237"></a> |
| <span class="sourceLineNo">1238</span> } else {<a name="line.1238"></a> |
| <span class="sourceLineNo">1239</span> LOG.warn("No matching procedure found for {} transition on {} to {}",<a name="line.1239"></a> |
| <span class="sourceLineNo">1240</span> serverNode.getServerName(), regionNode, state);<a name="line.1240"></a> |
| <span class="sourceLineNo">1241</span> }<a name="line.1241"></a> |
| <span class="sourceLineNo">1242</span> }<a name="line.1242"></a> |
| <span class="sourceLineNo">1243</span> } finally {<a name="line.1243"></a> |
| <span class="sourceLineNo">1244</span> regionNode.unlock();<a name="line.1244"></a> |
| <span class="sourceLineNo">1245</span> }<a name="line.1245"></a> |
| <span class="sourceLineNo">1246</span> }<a name="line.1246"></a> |
| <span class="sourceLineNo">1247</span><a name="line.1247"></a> |
| <span class="sourceLineNo">1248</span> private boolean reportTransition(RegionStateNode regionNode, ServerStateNode serverNode,<a name="line.1248"></a> |
| <span class="sourceLineNo">1249</span> TransitionCode state, long seqId, long procId) throws IOException {<a name="line.1249"></a> |
| <span class="sourceLineNo">1250</span> ServerName serverName = serverNode.getServerName();<a name="line.1250"></a> |
| <span class="sourceLineNo">1251</span> TransitRegionStateProcedure proc = regionNode.getProcedure();<a name="line.1251"></a> |
| <span class="sourceLineNo">1252</span> if (proc == null) {<a name="line.1252"></a> |
| <span class="sourceLineNo">1253</span> return false;<a name="line.1253"></a> |
| <span class="sourceLineNo">1254</span> }<a name="line.1254"></a> |
| <span class="sourceLineNo">1255</span> proc.reportTransition(master.getMasterProcedureExecutor().getEnvironment(), regionNode,<a name="line.1255"></a> |
| <span class="sourceLineNo">1256</span> serverName, state, seqId, procId);<a name="line.1256"></a> |
| <span class="sourceLineNo">1257</span> return true;<a name="line.1257"></a> |
| <span class="sourceLineNo">1258</span> }<a name="line.1258"></a> |
| <span class="sourceLineNo">1259</span><a name="line.1259"></a> |
| <span class="sourceLineNo">1260</span> private void updateRegionSplitTransition(final ServerStateNode serverNode,<a name="line.1260"></a> |
| <span class="sourceLineNo">1261</span> final TransitionCode state, final RegionInfo parent, final RegionInfo hriA,<a name="line.1261"></a> |
| <span class="sourceLineNo">1262</span> final RegionInfo hriB) throws IOException {<a name="line.1262"></a> |
| <span class="sourceLineNo">1263</span> checkMetaLoaded(parent);<a name="line.1263"></a> |
| <span class="sourceLineNo">1264</span><a name="line.1264"></a> |
| <span class="sourceLineNo">1265</span> if (state != TransitionCode.READY_TO_SPLIT) {<a name="line.1265"></a> |
| <span class="sourceLineNo">1266</span> throw new UnexpectedStateException(<a name="line.1266"></a> |
| <span class="sourceLineNo">1267</span> "unsupported split regionState=" + state + " for parent region " + parent<a name="line.1267"></a> |
| <span class="sourceLineNo">1268</span> + " maybe an old RS (< 2.0) had the operation in progress");<a name="line.1268"></a> |
| <span class="sourceLineNo">1269</span> }<a name="line.1269"></a> |
| <span class="sourceLineNo">1270</span><a name="line.1270"></a> |
| <span class="sourceLineNo">1271</span> // sanity check on the request<a name="line.1271"></a> |
| <span class="sourceLineNo">1272</span> if (!Bytes.equals(hriA.getEndKey(), hriB.getStartKey())) {<a name="line.1272"></a> |
| <span class="sourceLineNo">1273</span> throw new UnsupportedOperationException("unsupported split request with bad keys: parent="<a name="line.1273"></a> |
| <span class="sourceLineNo">1274</span> + parent + " hriA=" + hriA + " hriB=" + hriB);<a name="line.1274"></a> |
| <span class="sourceLineNo">1275</span> }<a name="line.1275"></a> |
| <span class="sourceLineNo">1276</span><a name="line.1276"></a> |
| <span class="sourceLineNo">1277</span> if (!master.isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) {<a name="line.1277"></a> |
| <span class="sourceLineNo">1278</span> LOG.warn("Split switch is off! skip split of " + parent);<a name="line.1278"></a> |
| <span class="sourceLineNo">1279</span> throw new DoNotRetryIOException(<a name="line.1279"></a> |
| <span class="sourceLineNo">1280</span> "Split region " + parent.getRegionNameAsString() + " failed due to split switch off");<a name="line.1280"></a> |
| <span class="sourceLineNo">1281</span> }<a name="line.1281"></a> |
| <span class="sourceLineNo">1282</span><a name="line.1282"></a> |
| <span class="sourceLineNo">1283</span> // Submit the Split procedure<a name="line.1283"></a> |
| <span class="sourceLineNo">1284</span> final byte[] splitKey = hriB.getStartKey();<a name="line.1284"></a> |
| <span class="sourceLineNo">1285</span> if (LOG.isDebugEnabled()) {<a name="line.1285"></a> |
| <span class="sourceLineNo">1286</span> LOG.debug("Split request from {}, parent={}, splitKey={}", serverNode.getServerName(), parent,<a name="line.1286"></a> |
| <span class="sourceLineNo">1287</span> Bytes.toStringBinary(splitKey));<a name="line.1287"></a> |
| <span class="sourceLineNo">1288</span> }<a name="line.1288"></a> |
| <span class="sourceLineNo">1289</span> // Processing this report happens asynchronously from other activities which can mutate<a name="line.1289"></a> |
| <span class="sourceLineNo">1290</span> // the region state. For example, a split procedure may already be running for this parent.<a name="line.1290"></a> |
| <span class="sourceLineNo">1291</span> // A split procedure cannot succeed if the parent region is no longer open, so we can<a name="line.1291"></a> |
| <span class="sourceLineNo">1292</span> // ignore it in that case.<a name="line.1292"></a> |
| <span class="sourceLineNo">1293</span> // Note that submitting more than one split procedure for a given region is<a name="line.1293"></a> |
| <span class="sourceLineNo">1294</span> // harmless -- the split is fenced in the procedure handling -- but it would be noisy in<a name="line.1294"></a> |
| <span class="sourceLineNo">1295</span> // the logs. Only one procedure can succeed. The other procedure(s) would abort during<a name="line.1295"></a> |
| <span class="sourceLineNo">1296</span> // initialization and report failure with WARN level logging.<a name="line.1296"></a> |
| <span class="sourceLineNo">1297</span> RegionState parentState = regionStates.getRegionState(parent);<a name="line.1297"></a> |
| <span class="sourceLineNo">1298</span> if (parentState != null && parentState.isOpened()) {<a name="line.1298"></a> |
| <span class="sourceLineNo">1299</span> master.getMasterProcedureExecutor().submitProcedure(createSplitProcedure(parent, splitKey));<a name="line.1299"></a> |
| <span class="sourceLineNo">1300</span> } else {<a name="line.1300"></a> |
| <span class="sourceLineNo">1301</span> LOG.info("Ignoring split request from {}, parent={} because parent is unknown or not open",<a name="line.1301"></a> |
| <span class="sourceLineNo">1302</span> serverNode.getServerName(), parent);<a name="line.1302"></a> |
| <span class="sourceLineNo">1303</span> return;<a name="line.1303"></a> |
| <span class="sourceLineNo">1304</span> }<a name="line.1304"></a> |
| <span class="sourceLineNo">1305</span><a name="line.1305"></a> |
| <span class="sourceLineNo">1306</span> // If the RS is < 2.0 throw an exception to abort the operation, we are handling the split<a name="line.1306"></a> |
| <span class="sourceLineNo">1307</span> if (master.getServerManager().getVersionNumber(serverNode.getServerName()) < 0x0200000) {<a name="line.1307"></a> |
| <span class="sourceLineNo">1308</span> throw new UnsupportedOperationException(<a name="line.1308"></a> |
| <span class="sourceLineNo">1309</span> String.format("Split handled by the master: " + "parent=%s hriA=%s hriB=%s",<a name="line.1309"></a> |
| <span class="sourceLineNo">1310</span> parent.getShortNameToLog(), hriA, hriB));<a name="line.1310"></a> |
| <span class="sourceLineNo">1311</span> }<a name="line.1311"></a> |
| <span class="sourceLineNo">1312</span> }<a name="line.1312"></a> |
| <span class="sourceLineNo">1313</span><a name="line.1313"></a> |
| <span class="sourceLineNo">1314</span> private void updateRegionMergeTransition(final ServerStateNode serverNode,<a name="line.1314"></a> |
| <span class="sourceLineNo">1315</span> final TransitionCode state, final RegionInfo merged, final RegionInfo hriA,<a name="line.1315"></a> |
| <span class="sourceLineNo">1316</span> final RegionInfo hriB) throws IOException {<a name="line.1316"></a> |
| <span class="sourceLineNo">1317</span> checkMetaLoaded(merged);<a name="line.1317"></a> |
| <span class="sourceLineNo">1318</span><a name="line.1318"></a> |
| <span class="sourceLineNo">1319</span> if (state != TransitionCode.READY_TO_MERGE) {<a name="line.1319"></a> |
| <span class="sourceLineNo">1320</span> throw new UnexpectedStateException(<a name="line.1320"></a> |
| <span class="sourceLineNo">1321</span> "Unsupported merge regionState=" + state + " for regionA=" + hriA + " regionB=" + hriB<a name="line.1321"></a> |
| <span class="sourceLineNo">1322</span> + " merged=" + merged + " maybe an old RS (< 2.0) had the operation in progress");<a name="line.1322"></a> |
| <span class="sourceLineNo">1323</span> }<a name="line.1323"></a> |
| <span class="sourceLineNo">1324</span><a name="line.1324"></a> |
| <span class="sourceLineNo">1325</span> if (!master.isSplitOrMergeEnabled(MasterSwitchType.MERGE)) {<a name="line.1325"></a> |
| <span class="sourceLineNo">1326</span> LOG.warn("Merge switch is off! skip merge of regionA=" + hriA + " regionB=" + hriB);<a name="line.1326"></a> |
| <span class="sourceLineNo">1327</span> throw new DoNotRetryIOException(<a name="line.1327"></a> |
| <span class="sourceLineNo">1328</span> "Merge of regionA=" + hriA + " regionB=" + hriB + " failed because merge switch is off");<a name="line.1328"></a> |
| <span class="sourceLineNo">1329</span> }<a name="line.1329"></a> |
| <span class="sourceLineNo">1330</span><a name="line.1330"></a> |
| <span class="sourceLineNo">1331</span> // Submit the Merge procedure<a name="line.1331"></a> |
| <span class="sourceLineNo">1332</span> if (LOG.isDebugEnabled()) {<a name="line.1332"></a> |
| <span class="sourceLineNo">1333</span> LOG.debug("Handling merge request from RS=" + merged + ", merged=" + merged);<a name="line.1333"></a> |
| <span class="sourceLineNo">1334</span> }<a name="line.1334"></a> |
| <span class="sourceLineNo">1335</span> master.getMasterProcedureExecutor().submitProcedure(createMergeProcedure(hriA, hriB));<a name="line.1335"></a> |
| <span class="sourceLineNo">1336</span><a name="line.1336"></a> |
| <span class="sourceLineNo">1337</span> // If the RS is < 2.0 throw an exception to abort the operation, we are handling the merge<a name="line.1337"></a> |
| <span class="sourceLineNo">1338</span> if (master.getServerManager().getVersionNumber(serverNode.getServerName()) < 0x0200000) {<a name="line.1338"></a> |
| <span class="sourceLineNo">1339</span> throw new UnsupportedOperationException(<a name="line.1339"></a> |
| <span class="sourceLineNo">1340</span> String.format("Merge not handled yet: regionState=%s merged=%s hriA=%s hriB=%s", state,<a name="line.1340"></a> |
| <span class="sourceLineNo">1341</span> merged, hriA, hriB));<a name="line.1341"></a> |
| <span class="sourceLineNo">1342</span> }<a name="line.1342"></a> |
| <span class="sourceLineNo">1343</span> }<a name="line.1343"></a> |
| <span class="sourceLineNo">1344</span><a name="line.1344"></a> |
| <span class="sourceLineNo">1345</span> // ============================================================================================<a name="line.1345"></a> |
| <span class="sourceLineNo">1346</span> // RS Status update (report online regions) helpers<a name="line.1346"></a> |
| <span class="sourceLineNo">1347</span> // ============================================================================================<a name="line.1347"></a> |
| <span class="sourceLineNo">1348</span> /**<a name="line.1348"></a> |
| <span class="sourceLineNo">1349</span> * The master will call this method when the RS send the regionServerReport(). The report will<a name="line.1349"></a> |
| <span class="sourceLineNo">1350</span> * contains the "online regions". This method will check the the online regions against the<a name="line.1350"></a> |
| <span class="sourceLineNo">1351</span> * in-memory state of the AM, and we will log a warn message if there is a mismatch. This is<a name="line.1351"></a> |
| <span class="sourceLineNo">1352</span> * because that there is no fencing between the reportRegionStateTransition method and<a name="line.1352"></a> |
| <span class="sourceLineNo">1353</span> * regionServerReport method, so there could be race and introduce inconsistency here, but<a name="line.1353"></a> |
| <span class="sourceLineNo">1354</span> * actually there is no problem.<a name="line.1354"></a> |
| <span class="sourceLineNo">1355</span> * <p/><a name="line.1355"></a> |
| <span class="sourceLineNo">1356</span> * Please see HBASE-21421 and HBASE-21463 for more details.<a name="line.1356"></a> |
| <span class="sourceLineNo">1357</span> */<a name="line.1357"></a> |
| <span class="sourceLineNo">1358</span> public void reportOnlineRegions(ServerName serverName, Set<byte[]> regionNames) {<a name="line.1358"></a> |
| <span class="sourceLineNo">1359</span> if (!isRunning()) {<a name="line.1359"></a> |
| <span class="sourceLineNo">1360</span> return;<a name="line.1360"></a> |
| <span class="sourceLineNo">1361</span> }<a name="line.1361"></a> |
| <span class="sourceLineNo">1362</span> if (LOG.isTraceEnabled()) {<a name="line.1362"></a> |
| <span class="sourceLineNo">1363</span> LOG.trace("ReportOnlineRegions {} regionCount={}, metaLoaded={} {}", serverName,<a name="line.1363"></a> |
| <span class="sourceLineNo">1364</span> regionNames.size(), isMetaLoaded(),<a name="line.1364"></a> |
| <span class="sourceLineNo">1365</span> regionNames.stream().map(Bytes::toStringBinary).collect(Collectors.toList()));<a name="line.1365"></a> |
| <span class="sourceLineNo">1366</span> }<a name="line.1366"></a> |
| <span class="sourceLineNo">1367</span><a name="line.1367"></a> |
| <span class="sourceLineNo">1368</span> ServerStateNode serverNode = regionStates.getServerNode(serverName);<a name="line.1368"></a> |
| <span class="sourceLineNo">1369</span> if (serverNode == null) {<a name="line.1369"></a> |
| <span class="sourceLineNo">1370</span> LOG.warn("Got a report from server {} where its server node is null", serverName);<a name="line.1370"></a> |
| <span class="sourceLineNo">1371</span> return;<a name="line.1371"></a> |
| <span class="sourceLineNo">1372</span> }<a name="line.1372"></a> |
| <span class="sourceLineNo">1373</span> serverNode.readLock().lock();<a name="line.1373"></a> |
| <span class="sourceLineNo">1374</span> try {<a name="line.1374"></a> |
| <span class="sourceLineNo">1375</span> if (!serverNode.isInState(ServerState.ONLINE)) {<a name="line.1375"></a> |
| <span class="sourceLineNo">1376</span> LOG.warn("Got a report from a server result in state {}", serverNode);<a name="line.1376"></a> |
| <span class="sourceLineNo">1377</span> return;<a name="line.1377"></a> |
| <span class="sourceLineNo">1378</span> }<a name="line.1378"></a> |
| <span class="sourceLineNo">1379</span> } finally {<a name="line.1379"></a> |
| <span class="sourceLineNo">1380</span> serverNode.readLock().unlock();<a name="line.1380"></a> |
| <span class="sourceLineNo">1381</span> }<a name="line.1381"></a> |
| <span class="sourceLineNo">1382</span><a name="line.1382"></a> |
| <span class="sourceLineNo">1383</span> // Track the regionserver reported online regions in memory.<a name="line.1383"></a> |
| <span class="sourceLineNo">1384</span> synchronized (rsReports) {<a name="line.1384"></a> |
| <span class="sourceLineNo">1385</span> rsReports.put(serverName, regionNames);<a name="line.1385"></a> |
| <span class="sourceLineNo">1386</span> }<a name="line.1386"></a> |
| <span class="sourceLineNo">1387</span><a name="line.1387"></a> |
| <span class="sourceLineNo">1388</span> if (regionNames.isEmpty()) {<a name="line.1388"></a> |
| <span class="sourceLineNo">1389</span> // nothing to do if we don't have regions<a name="line.1389"></a> |
| <span class="sourceLineNo">1390</span> LOG.trace("no online region found on {}", serverName);<a name="line.1390"></a> |
| <span class="sourceLineNo">1391</span> return;<a name="line.1391"></a> |
| <span class="sourceLineNo">1392</span> }<a name="line.1392"></a> |
| <span class="sourceLineNo">1393</span> if (!isMetaLoaded()) {<a name="line.1393"></a> |
| <span class="sourceLineNo">1394</span> // we are still on startup, skip checking<a name="line.1394"></a> |
| <span class="sourceLineNo">1395</span> return;<a name="line.1395"></a> |
| <span class="sourceLineNo">1396</span> }<a name="line.1396"></a> |
| <span class="sourceLineNo">1397</span> // The Heartbeat tells us of what regions are on the region serve, check the state.<a name="line.1397"></a> |
| <span class="sourceLineNo">1398</span> checkOnlineRegionsReport(serverNode, regionNames);<a name="line.1398"></a> |
| <span class="sourceLineNo">1399</span> }<a name="line.1399"></a> |
| <span class="sourceLineNo">1400</span><a name="line.1400"></a> |
| <span class="sourceLineNo">1401</span> /**<a name="line.1401"></a> |
| <span class="sourceLineNo">1402</span> * Close <code>regionName</code> on <code>sn</code> silently and immediately without using a<a name="line.1402"></a> |
| <span class="sourceLineNo">1403</span> * Procedure or going via hbase:meta. For case where a RegionServer's hosting of a Region is not<a name="line.1403"></a> |
| <span class="sourceLineNo">1404</span> * aligned w/ the Master's accounting of Region state. This is for cleaning up an error in<a name="line.1404"></a> |
| <span class="sourceLineNo">1405</span> * accounting.<a name="line.1405"></a> |
| <span class="sourceLineNo">1406</span> */<a name="line.1406"></a> |
| <span class="sourceLineNo">1407</span> private void closeRegionSilently(ServerName sn, byte[] regionName) {<a name="line.1407"></a> |
| <span class="sourceLineNo">1408</span> try {<a name="line.1408"></a> |
| <span class="sourceLineNo">1409</span> RegionInfo ri = CatalogFamilyFormat.parseRegionInfoFromRegionName(regionName);<a name="line.1409"></a> |
| <span class="sourceLineNo">1410</span> // Pass -1 for timeout. Means do not wait.<a name="line.1410"></a> |
| <span class="sourceLineNo">1411</span> ServerManager.closeRegionSilentlyAndWait(this.master.getAsyncClusterConnection(), sn, ri, -1);<a name="line.1411"></a> |
| <span class="sourceLineNo">1412</span> } catch (Exception e) {<a name="line.1412"></a> |
| <span class="sourceLineNo">1413</span> LOG.error("Failed trying to close {} on {}", Bytes.toStringBinary(regionName), sn, e);<a name="line.1413"></a> |
| <span class="sourceLineNo">1414</span> }<a name="line.1414"></a> |
| <span class="sourceLineNo">1415</span> }<a name="line.1415"></a> |
| <span class="sourceLineNo">1416</span><a name="line.1416"></a> |
| <span class="sourceLineNo">1417</span> /**<a name="line.1417"></a> |
| <span class="sourceLineNo">1418</span> * Check that what the RegionServer reports aligns with the Master's image. If disagreement, we<a name="line.1418"></a> |
| <span class="sourceLineNo">1419</span> * will tell the RegionServer to expediently close a Region we do not think it should have.<a name="line.1419"></a> |
| <span class="sourceLineNo">1420</span> */<a name="line.1420"></a> |
| <span class="sourceLineNo">1421</span> private void checkOnlineRegionsReport(ServerStateNode serverNode, Set<byte[]> regionNames) {<a name="line.1421"></a> |
| <span class="sourceLineNo">1422</span> ServerName serverName = serverNode.getServerName();<a name="line.1422"></a> |
| <span class="sourceLineNo">1423</span> for (byte[] regionName : regionNames) {<a name="line.1423"></a> |
| <span class="sourceLineNo">1424</span> if (!isRunning()) {<a name="line.1424"></a> |
| <span class="sourceLineNo">1425</span> return;<a name="line.1425"></a> |
| <span class="sourceLineNo">1426</span> }<a name="line.1426"></a> |
| <span class="sourceLineNo">1427</span> RegionStateNode regionNode = regionStates.getRegionStateNodeFromName(regionName);<a name="line.1427"></a> |
| <span class="sourceLineNo">1428</span> if (regionNode == null) {<a name="line.1428"></a> |
| <span class="sourceLineNo">1429</span> String regionNameAsStr = Bytes.toStringBinary(regionName);<a name="line.1429"></a> |
| <span class="sourceLineNo">1430</span> LOG.warn("No RegionStateNode for {} but reported as up on {}; closing...", regionNameAsStr,<a name="line.1430"></a> |
| <span class="sourceLineNo">1431</span> serverName);<a name="line.1431"></a> |
| <span class="sourceLineNo">1432</span> closeRegionSilently(serverNode.getServerName(), regionName);<a name="line.1432"></a> |
| <span class="sourceLineNo">1433</span> continue;<a name="line.1433"></a> |
| <span class="sourceLineNo">1434</span> }<a name="line.1434"></a> |
| <span class="sourceLineNo">1435</span> final long lag = 1000;<a name="line.1435"></a> |
| <span class="sourceLineNo">1436</span> // This is just a fallback check designed to identify unexpected data inconsistencies, so we<a name="line.1436"></a> |
| <span class="sourceLineNo">1437</span> // use tryLock to attempt to acquire the lock, and if the lock cannot be acquired, we skip the<a name="line.1437"></a> |
| <span class="sourceLineNo">1438</span> // check. This will not cause any additional problems and also prevents the regionServerReport<a name="line.1438"></a> |
| <span class="sourceLineNo">1439</span> // call from being stuck for too long which may cause deadlock on region assignment.<a name="line.1439"></a> |
| <span class="sourceLineNo">1440</span> if (regionNode.tryLock()) {<a name="line.1440"></a> |
| <span class="sourceLineNo">1441</span> try {<a name="line.1441"></a> |
| <span class="sourceLineNo">1442</span> long diff = EnvironmentEdgeManager.currentTime() - regionNode.getLastUpdate();<a name="line.1442"></a> |
| <span class="sourceLineNo">1443</span> if (regionNode.isInState(State.OPENING, State.OPEN)) {<a name="line.1443"></a> |
| <span class="sourceLineNo">1444</span> // This is possible as a region server has just closed a region but the region server<a name="line.1444"></a> |
| <span class="sourceLineNo">1445</span> // report is generated before the closing, but arrive after the closing. Make sure<a name="line.1445"></a> |
| <span class="sourceLineNo">1446</span> // there<a name="line.1446"></a> |
| <span class="sourceLineNo">1447</span> // is some elapsed time so less false alarms.<a name="line.1447"></a> |
| <span class="sourceLineNo">1448</span> if (!regionNode.getRegionLocation().equals(serverName) && diff > lag) {<a name="line.1448"></a> |
| <span class="sourceLineNo">1449</span> LOG.warn("Reporting {} server does not match {} (time since last "<a name="line.1449"></a> |
| <span class="sourceLineNo">1450</span> + "update={}ms); closing...", serverName, regionNode, diff);<a name="line.1450"></a> |
| <span class="sourceLineNo">1451</span> closeRegionSilently(serverNode.getServerName(), regionName);<a name="line.1451"></a> |
| <span class="sourceLineNo">1452</span> }<a name="line.1452"></a> |
| <span class="sourceLineNo">1453</span> } else if (!regionNode.isInState(State.CLOSING, State.SPLITTING)) {<a name="line.1453"></a> |
| <span class="sourceLineNo">1454</span> // So, we can get report that a region is CLOSED or SPLIT because a heartbeat<a name="line.1454"></a> |
| <span class="sourceLineNo">1455</span> // came in at about same time as a region transition. Make sure there is some<a name="line.1455"></a> |
| <span class="sourceLineNo">1456</span> // elapsed time so less false alarms.<a name="line.1456"></a> |
| <span class="sourceLineNo">1457</span> if (diff > lag) {<a name="line.1457"></a> |
| <span class="sourceLineNo">1458</span> LOG.warn("Reporting {} state does not match {} (time since last update={}ms)",<a name="line.1458"></a> |
| <span class="sourceLineNo">1459</span> serverName, regionNode, diff);<a name="line.1459"></a> |
| <span class="sourceLineNo">1460</span> }<a name="line.1460"></a> |
| <span class="sourceLineNo">1461</span> }<a name="line.1461"></a> |
| <span class="sourceLineNo">1462</span> } finally {<a name="line.1462"></a> |
| <span class="sourceLineNo">1463</span> regionNode.unlock();<a name="line.1463"></a> |
| <span class="sourceLineNo">1464</span> }<a name="line.1464"></a> |
| <span class="sourceLineNo">1465</span> } else {<a name="line.1465"></a> |
| <span class="sourceLineNo">1466</span> LOG.warn(<a name="line.1466"></a> |
| <span class="sourceLineNo">1467</span> "Unable to acquire lock for regionNode {}. It is likely that another thread is currently holding the lock. To avoid deadlock, skip execution for now.",<a name="line.1467"></a> |
| <span class="sourceLineNo">1468</span> regionNode);<a name="line.1468"></a> |
| <span class="sourceLineNo">1469</span> }<a name="line.1469"></a> |
| <span class="sourceLineNo">1470</span> }<a name="line.1470"></a> |
| <span class="sourceLineNo">1471</span> }<a name="line.1471"></a> |
| <span class="sourceLineNo">1472</span><a name="line.1472"></a> |
| <span class="sourceLineNo">1473</span> // ============================================================================================<a name="line.1473"></a> |
| <span class="sourceLineNo">1474</span> // RIT chore<a name="line.1474"></a> |
| <span class="sourceLineNo">1475</span> // ============================================================================================<a name="line.1475"></a> |
| <span class="sourceLineNo">1476</span> private static class RegionInTransitionChore extends ProcedureInMemoryChore<MasterProcedureEnv> {<a name="line.1476"></a> |
| <span class="sourceLineNo">1477</span> public RegionInTransitionChore(final int timeoutMsec) {<a name="line.1477"></a> |
| <span class="sourceLineNo">1478</span> super(timeoutMsec);<a name="line.1478"></a> |
| <span class="sourceLineNo">1479</span> }<a name="line.1479"></a> |
| <span class="sourceLineNo">1480</span><a name="line.1480"></a> |
| <span class="sourceLineNo">1481</span> @Override<a name="line.1481"></a> |
| <span class="sourceLineNo">1482</span> protected void periodicExecute(final MasterProcedureEnv env) {<a name="line.1482"></a> |
| <span class="sourceLineNo">1483</span> final AssignmentManager am = env.getAssignmentManager();<a name="line.1483"></a> |
| <span class="sourceLineNo">1484</span><a name="line.1484"></a> |
| <span class="sourceLineNo">1485</span> final RegionInTransitionStat ritStat = am.computeRegionInTransitionStat();<a name="line.1485"></a> |
| <span class="sourceLineNo">1486</span> if (ritStat.hasRegionsOverThreshold()) {<a name="line.1486"></a> |
| <span class="sourceLineNo">1487</span> for (RegionState hri : ritStat.getRegionOverThreshold()) {<a name="line.1487"></a> |
| <span class="sourceLineNo">1488</span> am.handleRegionOverStuckWarningThreshold(hri.getRegion());<a name="line.1488"></a> |
| <span class="sourceLineNo">1489</span> }<a name="line.1489"></a> |
| <span class="sourceLineNo">1490</span> }<a name="line.1490"></a> |
| <span class="sourceLineNo">1491</span><a name="line.1491"></a> |
| <span class="sourceLineNo">1492</span> // update metrics<a name="line.1492"></a> |
| <span class="sourceLineNo">1493</span> am.updateRegionsInTransitionMetrics(ritStat);<a name="line.1493"></a> |
| <span class="sourceLineNo">1494</span> }<a name="line.1494"></a> |
| <span class="sourceLineNo">1495</span> }<a name="line.1495"></a> |
| <span class="sourceLineNo">1496</span><a name="line.1496"></a> |
| <span class="sourceLineNo">1497</span> private static class DeadServerMetricRegionChore<a name="line.1497"></a> |
| <span class="sourceLineNo">1498</span> extends ProcedureInMemoryChore<MasterProcedureEnv> {<a name="line.1498"></a> |
| <span class="sourceLineNo">1499</span> public DeadServerMetricRegionChore(final int timeoutMsec) {<a name="line.1499"></a> |
| <span class="sourceLineNo">1500</span> super(timeoutMsec);<a name="line.1500"></a> |
| <span class="sourceLineNo">1501</span> }<a name="line.1501"></a> |
| <span class="sourceLineNo">1502</span><a name="line.1502"></a> |
| <span class="sourceLineNo">1503</span> @Override<a name="line.1503"></a> |
| <span class="sourceLineNo">1504</span> protected void periodicExecute(final MasterProcedureEnv env) {<a name="line.1504"></a> |
| <span class="sourceLineNo">1505</span> final ServerManager sm = env.getMasterServices().getServerManager();<a name="line.1505"></a> |
| <span class="sourceLineNo">1506</span> final AssignmentManager am = env.getAssignmentManager();<a name="line.1506"></a> |
| <span class="sourceLineNo">1507</span> // To minimize inconsistencies we are not going to snapshot live servers in advance in case<a name="line.1507"></a> |
| <span class="sourceLineNo">1508</span> // new servers are added; OTOH we don't want to add heavy sync for a consistent view since<a name="line.1508"></a> |
| <span class="sourceLineNo">1509</span> // this is for metrics. Instead, we're going to check each regions as we go; to avoid making<a name="line.1509"></a> |
| <span class="sourceLineNo">1510</span> // too many checks, we maintain a local lists of server, limiting us to false negatives. If<a name="line.1510"></a> |
| <span class="sourceLineNo">1511</span> // we miss some recently-dead server, we'll just see it next time.<a name="line.1511"></a> |
| <span class="sourceLineNo">1512</span> Set<ServerName> recentlyLiveServers = new HashSet<>();<a name="line.1512"></a> |
| <span class="sourceLineNo">1513</span> int deadRegions = 0, unknownRegions = 0;<a name="line.1513"></a> |
| <span class="sourceLineNo">1514</span> for (RegionStateNode rsn : am.getRegionStates().getRegionStateNodes()) {<a name="line.1514"></a> |
| <span class="sourceLineNo">1515</span> if (rsn.getState() != State.OPEN) {<a name="line.1515"></a> |
| <span class="sourceLineNo">1516</span> continue; // Opportunistic check, should quickly skip RITs, offline tables, etc.<a name="line.1516"></a> |
| <span class="sourceLineNo">1517</span> }<a name="line.1517"></a> |
| <span class="sourceLineNo">1518</span> // Do not need to acquire region state lock as this is only for showing metrics.<a name="line.1518"></a> |
| <span class="sourceLineNo">1519</span> ServerName sn = rsn.getRegionLocation();<a name="line.1519"></a> |
| <span class="sourceLineNo">1520</span> State state = rsn.getState();<a name="line.1520"></a> |
| <span class="sourceLineNo">1521</span> if (state != State.OPEN) {<a name="line.1521"></a> |
| <span class="sourceLineNo">1522</span> continue; // Mostly skipping RITs that are already being take care of.<a name="line.1522"></a> |
| <span class="sourceLineNo">1523</span> }<a name="line.1523"></a> |
| <span class="sourceLineNo">1524</span> if (sn == null) {<a name="line.1524"></a> |
| <span class="sourceLineNo">1525</span> ++unknownRegions; // Opened on null?<a name="line.1525"></a> |
| <span class="sourceLineNo">1526</span> continue;<a name="line.1526"></a> |
| <span class="sourceLineNo">1527</span> }<a name="line.1527"></a> |
| <span class="sourceLineNo">1528</span> if (recentlyLiveServers.contains(sn)) {<a name="line.1528"></a> |
| <span class="sourceLineNo">1529</span> continue;<a name="line.1529"></a> |
| <span class="sourceLineNo">1530</span> }<a name="line.1530"></a> |
| <span class="sourceLineNo">1531</span> ServerManager.ServerLiveState sls = sm.isServerKnownAndOnline(sn);<a name="line.1531"></a> |
| <span class="sourceLineNo">1532</span> switch (sls) {<a name="line.1532"></a> |
| <span class="sourceLineNo">1533</span> case LIVE:<a name="line.1533"></a> |
| <span class="sourceLineNo">1534</span> recentlyLiveServers.add(sn);<a name="line.1534"></a> |
| <span class="sourceLineNo">1535</span> break;<a name="line.1535"></a> |
| <span class="sourceLineNo">1536</span> case DEAD:<a name="line.1536"></a> |
| <span class="sourceLineNo">1537</span> ++deadRegions;<a name="line.1537"></a> |
| <span class="sourceLineNo">1538</span> break;<a name="line.1538"></a> |
| <span class="sourceLineNo">1539</span> case UNKNOWN:<a name="line.1539"></a> |
| <span class="sourceLineNo">1540</span> ++unknownRegions;<a name="line.1540"></a> |
| <span class="sourceLineNo">1541</span> break;<a name="line.1541"></a> |
| <span class="sourceLineNo">1542</span> default:<a name="line.1542"></a> |
| <span class="sourceLineNo">1543</span> throw new AssertionError("Unexpected " + sls);<a name="line.1543"></a> |
| <span class="sourceLineNo">1544</span> }<a name="line.1544"></a> |
| <span class="sourceLineNo">1545</span> }<a name="line.1545"></a> |
| <span class="sourceLineNo">1546</span> if (deadRegions > 0 || unknownRegions > 0) {<a name="line.1546"></a> |
| <span class="sourceLineNo">1547</span> LOG.info("Found {} OPEN regions on dead servers and {} OPEN regions on unknown servers",<a name="line.1547"></a> |
| <span class="sourceLineNo">1548</span> deadRegions, unknownRegions);<a name="line.1548"></a> |
| <span class="sourceLineNo">1549</span> }<a name="line.1549"></a> |
| <span class="sourceLineNo">1550</span><a name="line.1550"></a> |
| <span class="sourceLineNo">1551</span> am.updateDeadServerRegionMetrics(deadRegions, unknownRegions);<a name="line.1551"></a> |
| <span class="sourceLineNo">1552</span> }<a name="line.1552"></a> |
| <span class="sourceLineNo">1553</span> }<a name="line.1553"></a> |
| <span class="sourceLineNo">1554</span><a name="line.1554"></a> |
| <span class="sourceLineNo">1555</span> public RegionInTransitionStat computeRegionInTransitionStat() {<a name="line.1555"></a> |
| <span class="sourceLineNo">1556</span> final RegionInTransitionStat rit = new RegionInTransitionStat(getConfiguration());<a name="line.1556"></a> |
| <span class="sourceLineNo">1557</span> rit.update(this);<a name="line.1557"></a> |
| <span class="sourceLineNo">1558</span> return rit;<a name="line.1558"></a> |
| <span class="sourceLineNo">1559</span> }<a name="line.1559"></a> |
| <span class="sourceLineNo">1560</span><a name="line.1560"></a> |
| <span class="sourceLineNo">1561</span> public static class RegionInTransitionStat {<a name="line.1561"></a> |
| <span class="sourceLineNo">1562</span> private final int ritThreshold;<a name="line.1562"></a> |
| <span class="sourceLineNo">1563</span><a name="line.1563"></a> |
| <span class="sourceLineNo">1564</span> private HashMap<String, RegionState> ritsOverThreshold = null;<a name="line.1564"></a> |
| <span class="sourceLineNo">1565</span> private long statTimestamp;<a name="line.1565"></a> |
| <span class="sourceLineNo">1566</span> private long oldestRITTime = 0;<a name="line.1566"></a> |
| <span class="sourceLineNo">1567</span> private int totalRITsTwiceThreshold = 0;<a name="line.1567"></a> |
| <span class="sourceLineNo">1568</span> private int totalRITs = 0;<a name="line.1568"></a> |
| <span class="sourceLineNo">1569</span><a name="line.1569"></a> |
| <span class="sourceLineNo">1570</span> public RegionInTransitionStat(final Configuration conf) {<a name="line.1570"></a> |
| <span class="sourceLineNo">1571</span> this.ritThreshold =<a name="line.1571"></a> |
| <span class="sourceLineNo">1572</span> conf.getInt(METRICS_RIT_STUCK_WARNING_THRESHOLD, DEFAULT_RIT_STUCK_WARNING_THRESHOLD);<a name="line.1572"></a> |
| <span class="sourceLineNo">1573</span> }<a name="line.1573"></a> |
| <span class="sourceLineNo">1574</span><a name="line.1574"></a> |
| <span class="sourceLineNo">1575</span> public int getRITThreshold() {<a name="line.1575"></a> |
| <span class="sourceLineNo">1576</span> return ritThreshold;<a name="line.1576"></a> |
| <span class="sourceLineNo">1577</span> }<a name="line.1577"></a> |
| <span class="sourceLineNo">1578</span><a name="line.1578"></a> |
| <span class="sourceLineNo">1579</span> public long getTimestamp() {<a name="line.1579"></a> |
| <span class="sourceLineNo">1580</span> return statTimestamp;<a name="line.1580"></a> |
| <span class="sourceLineNo">1581</span> }<a name="line.1581"></a> |
| <span class="sourceLineNo">1582</span><a name="line.1582"></a> |
| <span class="sourceLineNo">1583</span> public int getTotalRITs() {<a name="line.1583"></a> |
| <span class="sourceLineNo">1584</span> return totalRITs;<a name="line.1584"></a> |
| <span class="sourceLineNo">1585</span> }<a name="line.1585"></a> |
| <span class="sourceLineNo">1586</span><a name="line.1586"></a> |
| <span class="sourceLineNo">1587</span> public long getOldestRITTime() {<a name="line.1587"></a> |
| <span class="sourceLineNo">1588</span> return oldestRITTime;<a name="line.1588"></a> |
| <span class="sourceLineNo">1589</span> }<a name="line.1589"></a> |
| <span class="sourceLineNo">1590</span><a name="line.1590"></a> |
| <span class="sourceLineNo">1591</span> public int getTotalRITsOverThreshold() {<a name="line.1591"></a> |
| <span class="sourceLineNo">1592</span> Map<String, RegionState> m = this.ritsOverThreshold;<a name="line.1592"></a> |
| <span class="sourceLineNo">1593</span> return m != null ? m.size() : 0;<a name="line.1593"></a> |
| <span class="sourceLineNo">1594</span> }<a name="line.1594"></a> |
| <span class="sourceLineNo">1595</span><a name="line.1595"></a> |
| <span class="sourceLineNo">1596</span> public boolean hasRegionsTwiceOverThreshold() {<a name="line.1596"></a> |
| <span class="sourceLineNo">1597</span> return totalRITsTwiceThreshold > 0;<a name="line.1597"></a> |
| <span class="sourceLineNo">1598</span> }<a name="line.1598"></a> |
| <span class="sourceLineNo">1599</span><a name="line.1599"></a> |
| <span class="sourceLineNo">1600</span> public boolean hasRegionsOverThreshold() {<a name="line.1600"></a> |
| <span class="sourceLineNo">1601</span> Map<String, RegionState> m = this.ritsOverThreshold;<a name="line.1601"></a> |
| <span class="sourceLineNo">1602</span> return m != null && !m.isEmpty();<a name="line.1602"></a> |
| <span class="sourceLineNo">1603</span> }<a name="line.1603"></a> |
| <span class="sourceLineNo">1604</span><a name="line.1604"></a> |
| <span class="sourceLineNo">1605</span> public Collection<RegionState> getRegionOverThreshold() {<a name="line.1605"></a> |
| <span class="sourceLineNo">1606</span> Map<String, RegionState> m = this.ritsOverThreshold;<a name="line.1606"></a> |
| <span class="sourceLineNo">1607</span> return m != null ? m.values() : Collections.emptySet();<a name="line.1607"></a> |
| <span class="sourceLineNo">1608</span> }<a name="line.1608"></a> |
| <span class="sourceLineNo">1609</span><a name="line.1609"></a> |
| <span class="sourceLineNo">1610</span> public boolean isRegionOverThreshold(final RegionInfo regionInfo) {<a name="line.1610"></a> |
| <span class="sourceLineNo">1611</span> Map<String, RegionState> m = this.ritsOverThreshold;<a name="line.1611"></a> |
| <span class="sourceLineNo">1612</span> return m != null && m.containsKey(regionInfo.getEncodedName());<a name="line.1612"></a> |
| <span class="sourceLineNo">1613</span> }<a name="line.1613"></a> |
| <span class="sourceLineNo">1614</span><a name="line.1614"></a> |
| <span class="sourceLineNo">1615</span> public boolean isRegionTwiceOverThreshold(final RegionInfo regionInfo) {<a name="line.1615"></a> |
| <span class="sourceLineNo">1616</span> Map<String, RegionState> m = this.ritsOverThreshold;<a name="line.1616"></a> |
| <span class="sourceLineNo">1617</span> if (m == null) {<a name="line.1617"></a> |
| <span class="sourceLineNo">1618</span> return false;<a name="line.1618"></a> |
| <span class="sourceLineNo">1619</span> }<a name="line.1619"></a> |
| <span class="sourceLineNo">1620</span> final RegionState state = m.get(regionInfo.getEncodedName());<a name="line.1620"></a> |
| <span class="sourceLineNo">1621</span> if (state == null) {<a name="line.1621"></a> |
| <span class="sourceLineNo">1622</span> return false;<a name="line.1622"></a> |
| <span class="sourceLineNo">1623</span> }<a name="line.1623"></a> |
| <span class="sourceLineNo">1624</span> return (statTimestamp - state.getStamp()) > (ritThreshold * 2);<a name="line.1624"></a> |
| <span class="sourceLineNo">1625</span> }<a name="line.1625"></a> |
| <span class="sourceLineNo">1626</span><a name="line.1626"></a> |
| <span class="sourceLineNo">1627</span> protected void update(final AssignmentManager am) {<a name="line.1627"></a> |
| <span class="sourceLineNo">1628</span> final RegionStates regionStates = am.getRegionStates();<a name="line.1628"></a> |
| <span class="sourceLineNo">1629</span> this.statTimestamp = EnvironmentEdgeManager.currentTime();<a name="line.1629"></a> |
| <span class="sourceLineNo">1630</span> update(regionStates.getRegionsStateInTransition(), statTimestamp);<a name="line.1630"></a> |
| <span class="sourceLineNo">1631</span> update(regionStates.getRegionFailedOpen(), statTimestamp);<a name="line.1631"></a> |
| <span class="sourceLineNo">1632</span><a name="line.1632"></a> |
| <span class="sourceLineNo">1633</span> if (LOG.isDebugEnabled() && ritsOverThreshold != null && !ritsOverThreshold.isEmpty()) {<a name="line.1633"></a> |
| <span class="sourceLineNo">1634</span> LOG.debug("RITs over threshold: {}",<a name="line.1634"></a> |
| <span class="sourceLineNo">1635</span> ritsOverThreshold.entrySet().stream()<a name="line.1635"></a> |
| <span class="sourceLineNo">1636</span> .map(e -> e.getKey() + ":" + e.getValue().getState().name())<a name="line.1636"></a> |
| <span class="sourceLineNo">1637</span> .collect(Collectors.joining("\n")));<a name="line.1637"></a> |
| <span class="sourceLineNo">1638</span> }<a name="line.1638"></a> |
| <span class="sourceLineNo">1639</span> }<a name="line.1639"></a> |
| <span class="sourceLineNo">1640</span><a name="line.1640"></a> |
| <span class="sourceLineNo">1641</span> private void update(final Collection<RegionState> regions, final long currentTime) {<a name="line.1641"></a> |
| <span class="sourceLineNo">1642</span> for (RegionState state : regions) {<a name="line.1642"></a> |
| <span class="sourceLineNo">1643</span> totalRITs++;<a name="line.1643"></a> |
| <span class="sourceLineNo">1644</span> final long ritStartedMs = state.getStamp();<a name="line.1644"></a> |
| <span class="sourceLineNo">1645</span> if (ritStartedMs == 0) {<a name="line.1645"></a> |
| <span class="sourceLineNo">1646</span> // Don't output bogus values to metrics if they accidentally make it here.<a name="line.1646"></a> |
| <span class="sourceLineNo">1647</span> LOG.warn("The RIT {} has no start time", state.getRegion());<a name="line.1647"></a> |
| <span class="sourceLineNo">1648</span> continue;<a name="line.1648"></a> |
| <span class="sourceLineNo">1649</span> }<a name="line.1649"></a> |
| <span class="sourceLineNo">1650</span> final long ritTime = currentTime - ritStartedMs;<a name="line.1650"></a> |
| <span class="sourceLineNo">1651</span> if (ritTime > ritThreshold) {<a name="line.1651"></a> |
| <span class="sourceLineNo">1652</span> if (ritsOverThreshold == null) {<a name="line.1652"></a> |
| <span class="sourceLineNo">1653</span> ritsOverThreshold = new HashMap<String, RegionState>();<a name="line.1653"></a> |
| <span class="sourceLineNo">1654</span> }<a name="line.1654"></a> |
| <span class="sourceLineNo">1655</span> ritsOverThreshold.put(state.getRegion().getEncodedName(), state);<a name="line.1655"></a> |
| <span class="sourceLineNo">1656</span> totalRITsTwiceThreshold += (ritTime > (ritThreshold * 2)) ? 1 : 0;<a name="line.1656"></a> |
| <span class="sourceLineNo">1657</span> }<a name="line.1657"></a> |
| <span class="sourceLineNo">1658</span> if (oldestRITTime < ritTime) {<a name="line.1658"></a> |
| <span class="sourceLineNo">1659</span> oldestRITTime = ritTime;<a name="line.1659"></a> |
| <span class="sourceLineNo">1660</span> }<a name="line.1660"></a> |
| <span class="sourceLineNo">1661</span> }<a name="line.1661"></a> |
| <span class="sourceLineNo">1662</span> }<a name="line.1662"></a> |
| <span class="sourceLineNo">1663</span> }<a name="line.1663"></a> |
| <span class="sourceLineNo">1664</span><a name="line.1664"></a> |
| <span class="sourceLineNo">1665</span> private void updateRegionsInTransitionMetrics(final RegionInTransitionStat ritStat) {<a name="line.1665"></a> |
| <span class="sourceLineNo">1666</span> metrics.updateRITOldestAge(ritStat.getOldestRITTime());<a name="line.1666"></a> |
| <span class="sourceLineNo">1667</span> metrics.updateRITCount(ritStat.getTotalRITs());<a name="line.1667"></a> |
| <span class="sourceLineNo">1668</span> metrics.updateRITCountOverThreshold(ritStat.getTotalRITsOverThreshold());<a name="line.1668"></a> |
| <span class="sourceLineNo">1669</span> }<a name="line.1669"></a> |
| <span class="sourceLineNo">1670</span><a name="line.1670"></a> |
| <span class="sourceLineNo">1671</span> private void updateDeadServerRegionMetrics(int deadRegions, int unknownRegions) {<a name="line.1671"></a> |
| <span class="sourceLineNo">1672</span> metrics.updateDeadServerOpenRegions(deadRegions);<a name="line.1672"></a> |
| <span class="sourceLineNo">1673</span> metrics.updateUnknownServerOpenRegions(unknownRegions);<a name="line.1673"></a> |
| <span class="sourceLineNo">1674</span> }<a name="line.1674"></a> |
| <span class="sourceLineNo">1675</span><a name="line.1675"></a> |
| <span class="sourceLineNo">1676</span> private void handleRegionOverStuckWarningThreshold(final RegionInfo regionInfo) {<a name="line.1676"></a> |
| <span class="sourceLineNo">1677</span> final RegionStateNode regionNode = regionStates.getRegionStateNode(regionInfo);<a name="line.1677"></a> |
| <span class="sourceLineNo">1678</span> // if (regionNode.isStuck()) {<a name="line.1678"></a> |
| <span class="sourceLineNo">1679</span> LOG.warn("STUCK Region-In-Transition {}", regionNode);<a name="line.1679"></a> |
| <span class="sourceLineNo">1680</span> }<a name="line.1680"></a> |
| <span class="sourceLineNo">1681</span><a name="line.1681"></a> |
| <span class="sourceLineNo">1682</span> // ============================================================================================<a name="line.1682"></a> |
| <span class="sourceLineNo">1683</span> // TODO: Master load/bootstrap<a name="line.1683"></a> |
| <span class="sourceLineNo">1684</span> // ============================================================================================<a name="line.1684"></a> |
| <span class="sourceLineNo">1685</span> public void joinCluster() throws IOException {<a name="line.1685"></a> |
| <span class="sourceLineNo">1686</span> long startTime = System.nanoTime();<a name="line.1686"></a> |
| <span class="sourceLineNo">1687</span> LOG.debug("Joining cluster...");<a name="line.1687"></a> |
| <span class="sourceLineNo">1688</span><a name="line.1688"></a> |
| <span class="sourceLineNo">1689</span> // Scan hbase:meta to build list of existing regions, servers, and assignment.<a name="line.1689"></a> |
| <span class="sourceLineNo">1690</span> // hbase:meta is online now or will be. Inside loadMeta, we keep trying. Can't make progress<a name="line.1690"></a> |
| <span class="sourceLineNo">1691</span> // w/o meta.<a name="line.1691"></a> |
| <span class="sourceLineNo">1692</span> loadMeta();<a name="line.1692"></a> |
| <span class="sourceLineNo">1693</span><a name="line.1693"></a> |
| <span class="sourceLineNo">1694</span> while (master.getServerManager().countOfRegionServers() < 1) {<a name="line.1694"></a> |
| <span class="sourceLineNo">1695</span> LOG.info("Waiting for RegionServers to join; current count={}",<a name="line.1695"></a> |
| <span class="sourceLineNo">1696</span> master.getServerManager().countOfRegionServers());<a name="line.1696"></a> |
| <span class="sourceLineNo">1697</span> Threads.sleep(250);<a name="line.1697"></a> |
| <span class="sourceLineNo">1698</span> }<a name="line.1698"></a> |
| <span class="sourceLineNo">1699</span> LOG.info("Number of RegionServers={}", master.getServerManager().countOfRegionServers());<a name="line.1699"></a> |
| <span class="sourceLineNo">1700</span><a name="line.1700"></a> |
| <span class="sourceLineNo">1701</span> // Start the chores<a name="line.1701"></a> |
| <span class="sourceLineNo">1702</span> master.getMasterProcedureExecutor().addChore(this.ritChore);<a name="line.1702"></a> |
| <span class="sourceLineNo">1703</span> master.getMasterProcedureExecutor().addChore(this.deadMetricChore);<a name="line.1703"></a> |
| <span class="sourceLineNo">1704</span><a name="line.1704"></a> |
| <span class="sourceLineNo">1705</span> long costMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime);<a name="line.1705"></a> |
| <span class="sourceLineNo">1706</span> LOG.info("Joined the cluster in {}", StringUtils.humanTimeDiff(costMs));<a name="line.1706"></a> |
| <span class="sourceLineNo">1707</span> }<a name="line.1707"></a> |
| <span class="sourceLineNo">1708</span><a name="line.1708"></a> |
| <span class="sourceLineNo">1709</span> /**<a name="line.1709"></a> |
| <span class="sourceLineNo">1710</span> * Create assign procedure for offline regions. Just follow the old<a name="line.1710"></a> |
| <span class="sourceLineNo">1711</span> * processofflineServersWithOnlineRegions method. Since now we do not need to deal with dead<a name="line.1711"></a> |
| <span class="sourceLineNo">1712</span> * server any more, we only deal with the regions in OFFLINE state in this method. And this is a<a name="line.1712"></a> |
| <span class="sourceLineNo">1713</span> * bit strange, that for new regions, we will add it in CLOSED state instead of OFFLINE state, and<a name="line.1713"></a> |
| <span class="sourceLineNo">1714</span> * usually there will be a procedure to track them. The processofflineServersWithOnlineRegions is<a name="line.1714"></a> |
| <span class="sourceLineNo">1715</span> * a legacy from long ago, as things are going really different now, maybe we do not need this<a name="line.1715"></a> |
| <span class="sourceLineNo">1716</span> * method any more. Need to revisit later.<a name="line.1716"></a> |
| <span class="sourceLineNo">1717</span> */<a name="line.1717"></a> |
| <span class="sourceLineNo">1718</span> // Public so can be run by the Master as part of the startup. Needs hbase:meta to be online.<a name="line.1718"></a> |
| <span class="sourceLineNo">1719</span> // Needs to be done after the table state manager has been started.<a name="line.1719"></a> |
| <span class="sourceLineNo">1720</span> public void processOfflineRegions() {<a name="line.1720"></a> |
| <span class="sourceLineNo">1721</span> TransitRegionStateProcedure[] procs =<a name="line.1721"></a> |
| <span class="sourceLineNo">1722</span> regionStates.getRegionStateNodes().stream().filter(rsn -> rsn.isInState(State.OFFLINE))<a name="line.1722"></a> |
| <span class="sourceLineNo">1723</span> .filter(rsn -> isTableEnabled(rsn.getRegionInfo().getTable())).map(rsn -> {<a name="line.1723"></a> |
| <span class="sourceLineNo">1724</span> rsn.lock();<a name="line.1724"></a> |
| <span class="sourceLineNo">1725</span> try {<a name="line.1725"></a> |
| <span class="sourceLineNo">1726</span> if (rsn.getProcedure() != null) {<a name="line.1726"></a> |
| <span class="sourceLineNo">1727</span> return null;<a name="line.1727"></a> |
| <span class="sourceLineNo">1728</span> } else {<a name="line.1728"></a> |
| <span class="sourceLineNo">1729</span> return rsn.setProcedure(TransitRegionStateProcedure.assign(getProcedureEnvironment(),<a name="line.1729"></a> |
| <span class="sourceLineNo">1730</span> rsn.getRegionInfo(), null));<a name="line.1730"></a> |
| <span class="sourceLineNo">1731</span> }<a name="line.1731"></a> |
| <span class="sourceLineNo">1732</span> } finally {<a name="line.1732"></a> |
| <span class="sourceLineNo">1733</span> rsn.unlock();<a name="line.1733"></a> |
| <span class="sourceLineNo">1734</span> }<a name="line.1734"></a> |
| <span class="sourceLineNo">1735</span> }).filter(p -> p != null).toArray(TransitRegionStateProcedure[]::new);<a name="line.1735"></a> |
| <span class="sourceLineNo">1736</span> if (procs.length > 0) {<a name="line.1736"></a> |
| <span class="sourceLineNo">1737</span> master.getMasterProcedureExecutor().submitProcedures(procs);<a name="line.1737"></a> |
| <span class="sourceLineNo">1738</span> }<a name="line.1738"></a> |
| <span class="sourceLineNo">1739</span> }<a name="line.1739"></a> |
| <span class="sourceLineNo">1740</span><a name="line.1740"></a> |
| <span class="sourceLineNo">1741</span> /*<a name="line.1741"></a> |
| <span class="sourceLineNo">1742</span> * AM internal RegionStateStore.RegionStateVisitor implementation. To be used when scanning META<a name="line.1742"></a> |
| <span class="sourceLineNo">1743</span> * table for region rows, using RegionStateStore utility methods. RegionStateStore methods will<a name="line.1743"></a> |
| <span class="sourceLineNo">1744</span> * convert Result into proper RegionInfo instances, but those would still need to be added into<a name="line.1744"></a> |
| <span class="sourceLineNo">1745</span> * AssignmentManager.regionStates in-memory cache. RegionMetaLoadingVisitor.visitRegionState<a name="line.1745"></a> |
| <span class="sourceLineNo">1746</span> * method provides the logic for adding RegionInfo instances as loaded from latest META scan into<a name="line.1746"></a> |
| <span class="sourceLineNo">1747</span> * AssignmentManager.regionStates.<a name="line.1747"></a> |
| <span class="sourceLineNo">1748</span> */<a name="line.1748"></a> |
| <span class="sourceLineNo">1749</span> private class RegionMetaLoadingVisitor implements RegionStateStore.RegionStateVisitor {<a name="line.1749"></a> |
| <span class="sourceLineNo">1750</span><a name="line.1750"></a> |
| <span class="sourceLineNo">1751</span> @Override<a name="line.1751"></a> |
| <span class="sourceLineNo">1752</span> public void visitRegionState(Result result, final RegionInfo regionInfo, final State state,<a name="line.1752"></a> |
| <span class="sourceLineNo">1753</span> final ServerName regionLocation, final ServerName lastHost, final long openSeqNum) {<a name="line.1753"></a> |
| <span class="sourceLineNo">1754</span> if (<a name="line.1754"></a> |
| <span class="sourceLineNo">1755</span> state == null && regionLocation == null && lastHost == null<a name="line.1755"></a> |
| <span class="sourceLineNo">1756</span> && openSeqNum == SequenceId.NO_SEQUENCE_ID<a name="line.1756"></a> |
| <span class="sourceLineNo">1757</span> ) {<a name="line.1757"></a> |
| <span class="sourceLineNo">1758</span> // This is a row with nothing in it.<a name="line.1758"></a> |
| <span class="sourceLineNo">1759</span> LOG.warn("Skipping empty row={}", result);<a name="line.1759"></a> |
| <span class="sourceLineNo">1760</span> return;<a name="line.1760"></a> |
| <span class="sourceLineNo">1761</span> }<a name="line.1761"></a> |
| <span class="sourceLineNo">1762</span> State localState = state;<a name="line.1762"></a> |
| <span class="sourceLineNo">1763</span> if (localState == null) {<a name="line.1763"></a> |
| <span class="sourceLineNo">1764</span> // No region state column data in hbase:meta table! Are I doing a rolling upgrade from<a name="line.1764"></a> |
| <span class="sourceLineNo">1765</span> // hbase1 to hbase2? Am I restoring a SNAPSHOT or otherwise adding a region to hbase:meta?<a name="line.1765"></a> |
| <span class="sourceLineNo">1766</span> // In any of these cases, state is empty. For now, presume OFFLINE but there are probably<a name="line.1766"></a> |
| <span class="sourceLineNo">1767</span> // cases where we need to probe more to be sure this correct; TODO informed by experience.<a name="line.1767"></a> |
| <span class="sourceLineNo">1768</span> LOG.info(regionInfo.getEncodedName() + " regionState=null; presuming " + State.OFFLINE);<a name="line.1768"></a> |
| <span class="sourceLineNo">1769</span> localState = State.OFFLINE;<a name="line.1769"></a> |
| <span class="sourceLineNo">1770</span> }<a name="line.1770"></a> |
| <span class="sourceLineNo">1771</span> RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo);<a name="line.1771"></a> |
| <span class="sourceLineNo">1772</span> // Do not need to lock on regionNode, as we can make sure that before we finish loading<a name="line.1772"></a> |
| <span class="sourceLineNo">1773</span> // meta, all the related procedures can not be executed. The only exception is for meta<a name="line.1773"></a> |
| <span class="sourceLineNo">1774</span> // region related operations, but here we do not load the informations for meta region.<a name="line.1774"></a> |
| <span class="sourceLineNo">1775</span> regionNode.setState(localState);<a name="line.1775"></a> |
| <span class="sourceLineNo">1776</span> regionNode.setLastHost(lastHost);<a name="line.1776"></a> |
| <span class="sourceLineNo">1777</span> regionNode.setRegionLocation(regionLocation);<a name="line.1777"></a> |
| <span class="sourceLineNo">1778</span> regionNode.setOpenSeqNum(openSeqNum);<a name="line.1778"></a> |
| <span class="sourceLineNo">1779</span><a name="line.1779"></a> |
| <span class="sourceLineNo">1780</span> // Note: keep consistent with other methods, see region(Opening|Opened|Closing)<a name="line.1780"></a> |
| <span class="sourceLineNo">1781</span> // RIT/ServerCrash handling should take care of the transiting regions.<a name="line.1781"></a> |
| <span class="sourceLineNo">1782</span> if (<a name="line.1782"></a> |
| <span class="sourceLineNo">1783</span> localState.matches(State.OPEN, State.OPENING, State.CLOSING, State.SPLITTING, State.MERGING)<a name="line.1783"></a> |
| <span class="sourceLineNo">1784</span> ) {<a name="line.1784"></a> |
| <span class="sourceLineNo">1785</span> assert regionLocation != null : "found null region location for " + regionNode;<a name="line.1785"></a> |
| <span class="sourceLineNo">1786</span> // TODO: this could lead to some orphan server state nodes, as it is possible that the<a name="line.1786"></a> |
| <span class="sourceLineNo">1787</span> // region server is already dead and its SCP has already finished but we have<a name="line.1787"></a> |
| <span class="sourceLineNo">1788</span> // persisted an opening state on this region server. Finally the TRSP will assign the<a name="line.1788"></a> |
| <span class="sourceLineNo">1789</span> // region to another region server, so it will not cause critical problems, just waste<a name="line.1789"></a> |
| <span class="sourceLineNo">1790</span> // some memory as no one will try to cleanup these orphan server state nodes.<a name="line.1790"></a> |
| <span class="sourceLineNo">1791</span> regionStates.createServer(regionLocation);<a name="line.1791"></a> |
| <span class="sourceLineNo">1792</span> regionStates.addRegionToServer(regionNode);<a name="line.1792"></a> |
| <span class="sourceLineNo">1793</span> } else if (localState == State.OFFLINE || regionInfo.isOffline()) {<a name="line.1793"></a> |
| <span class="sourceLineNo">1794</span> regionStates.addToOfflineRegions(regionNode);<a name="line.1794"></a> |
| <span class="sourceLineNo">1795</span> }<a name="line.1795"></a> |
| <span class="sourceLineNo">1796</span> if (regionNode.getProcedure() != null) {<a name="line.1796"></a> |
| <span class="sourceLineNo">1797</span> regionNode.getProcedure().stateLoaded(AssignmentManager.this, regionNode);<a name="line.1797"></a> |
| <span class="sourceLineNo">1798</span> }<a name="line.1798"></a> |
| <span class="sourceLineNo">1799</span> }<a name="line.1799"></a> |
| <span class="sourceLineNo">1800</span> };<a name="line.1800"></a> |
| <span class="sourceLineNo">1801</span><a name="line.1801"></a> |
| <span class="sourceLineNo">1802</span> /**<a name="line.1802"></a> |
| <span class="sourceLineNo">1803</span> * Attempt to load {@code regionInfo} from META, adding any results to the<a name="line.1803"></a> |
| <span class="sourceLineNo">1804</span> * {@link #regionStateStore} Is NOT aware of replica regions.<a name="line.1804"></a> |
| <span class="sourceLineNo">1805</span> * @param regionInfo the region to be loaded from META.<a name="line.1805"></a> |
| <span class="sourceLineNo">1806</span> * @throws IOException If some error occurs while querying META or parsing results.<a name="line.1806"></a> |
| <span class="sourceLineNo">1807</span> */<a name="line.1807"></a> |
| <span class="sourceLineNo">1808</span> public void populateRegionStatesFromMeta(@NonNull final RegionInfo regionInfo)<a name="line.1808"></a> |
| <span class="sourceLineNo">1809</span> throws IOException {<a name="line.1809"></a> |
| <span class="sourceLineNo">1810</span> final String regionEncodedName = RegionInfo.DEFAULT_REPLICA_ID == regionInfo.getReplicaId()<a name="line.1810"></a> |
| <span class="sourceLineNo">1811</span> ? regionInfo.getEncodedName()<a name="line.1811"></a> |
| <span class="sourceLineNo">1812</span> : RegionInfoBuilder.newBuilder(regionInfo).setReplicaId(RegionInfo.DEFAULT_REPLICA_ID).build()<a name="line.1812"></a> |
| <span class="sourceLineNo">1813</span> .getEncodedName();<a name="line.1813"></a> |
| <span class="sourceLineNo">1814</span> populateRegionStatesFromMeta(regionEncodedName);<a name="line.1814"></a> |
| <span class="sourceLineNo">1815</span> }<a name="line.1815"></a> |
| <span class="sourceLineNo">1816</span><a name="line.1816"></a> |
| <span class="sourceLineNo">1817</span> /**<a name="line.1817"></a> |
| <span class="sourceLineNo">1818</span> * Attempt to load {@code regionEncodedName} from META, adding any results to the<a name="line.1818"></a> |
| <span class="sourceLineNo">1819</span> * {@link #regionStateStore} Is NOT aware of replica regions.<a name="line.1819"></a> |
| <span class="sourceLineNo">1820</span> * @param regionEncodedName encoded name for the region to be loaded from META.<a name="line.1820"></a> |
| <span class="sourceLineNo">1821</span> * @throws IOException If some error occurs while querying META or parsing results.<a name="line.1821"></a> |
| <span class="sourceLineNo">1822</span> */<a name="line.1822"></a> |
| <span class="sourceLineNo">1823</span> public void populateRegionStatesFromMeta(@NonNull String regionEncodedName) throws IOException {<a name="line.1823"></a> |
| <span class="sourceLineNo">1824</span> final RegionMetaLoadingVisitor visitor = new RegionMetaLoadingVisitor();<a name="line.1824"></a> |
| <span class="sourceLineNo">1825</span> regionStateStore.visitMetaForRegion(regionEncodedName, visitor);<a name="line.1825"></a> |
| <span class="sourceLineNo">1826</span> }<a name="line.1826"></a> |
| <span class="sourceLineNo">1827</span><a name="line.1827"></a> |
| <span class="sourceLineNo">1828</span> private void loadMeta() throws IOException {<a name="line.1828"></a> |
| <span class="sourceLineNo">1829</span> // TODO: use a thread pool<a name="line.1829"></a> |
| <span class="sourceLineNo">1830</span> regionStateStore.visitMeta(new RegionMetaLoadingVisitor());<a name="line.1830"></a> |
| <span class="sourceLineNo">1831</span> }<a name="line.1831"></a> |
| <span class="sourceLineNo">1832</span><a name="line.1832"></a> |
| <span class="sourceLineNo">1833</span> /**<a name="line.1833"></a> |
| <span class="sourceLineNo">1834</span> * Used to check if the meta loading is done.<a name="line.1834"></a> |
| <span class="sourceLineNo">1835</span> * <p/><a name="line.1835"></a> |
| <span class="sourceLineNo">1836</span> * if not we throw PleaseHoldException since we are rebuilding the RegionStates<a name="line.1836"></a> |
| <span class="sourceLineNo">1837</span> * @param hri region to check if it is already rebuild<a name="line.1837"></a> |
| <span class="sourceLineNo">1838</span> * @throws PleaseHoldException if meta has not been loaded yet<a name="line.1838"></a> |
| <span class="sourceLineNo">1839</span> */<a name="line.1839"></a> |
| <span class="sourceLineNo">1840</span> private void checkMetaLoaded(RegionInfo hri) throws PleaseHoldException {<a name="line.1840"></a> |
| <span class="sourceLineNo">1841</span> if (!isRunning()) {<a name="line.1841"></a> |
| <span class="sourceLineNo">1842</span> throw new PleaseHoldException("AssignmentManager not running");<a name="line.1842"></a> |
| <span class="sourceLineNo">1843</span> }<a name="line.1843"></a> |
| <span class="sourceLineNo">1844</span> boolean meta = isMetaRegion(hri);<a name="line.1844"></a> |
| <span class="sourceLineNo">1845</span> boolean metaLoaded = isMetaLoaded();<a name="line.1845"></a> |
| <span class="sourceLineNo">1846</span> if (!meta && !metaLoaded) {<a name="line.1846"></a> |
| <span class="sourceLineNo">1847</span> throw new PleaseHoldException(<a name="line.1847"></a> |
| <span class="sourceLineNo">1848</span> "Master not fully online; hbase:meta=" + meta + ", metaLoaded=" + metaLoaded);<a name="line.1848"></a> |
| <span class="sourceLineNo">1849</span> }<a name="line.1849"></a> |
| <span class="sourceLineNo">1850</span> }<a name="line.1850"></a> |
| <span class="sourceLineNo">1851</span><a name="line.1851"></a> |
| <span class="sourceLineNo">1852</span> // ============================================================================================<a name="line.1852"></a> |
| <span class="sourceLineNo">1853</span> // TODO: Metrics<a name="line.1853"></a> |
| <span class="sourceLineNo">1854</span> // ============================================================================================<a name="line.1854"></a> |
| <span class="sourceLineNo">1855</span> public int getNumRegionsOpened() {<a name="line.1855"></a> |
| <span class="sourceLineNo">1856</span> // TODO: Used by TestRegionPlacement.java and assume monotonically increasing value<a name="line.1856"></a> |
| <span class="sourceLineNo">1857</span> return 0;<a name="line.1857"></a> |
| <span class="sourceLineNo">1858</span> }<a name="line.1858"></a> |
| <span class="sourceLineNo">1859</span><a name="line.1859"></a> |
| <span class="sourceLineNo">1860</span> /**<a name="line.1860"></a> |
| <span class="sourceLineNo">1861</span> * Usually run by the Master in reaction to server crash during normal processing. Can also be<a name="line.1861"></a> |
| <span class="sourceLineNo">1862</span> * invoked via external RPC to effect repair; in the latter case, the 'force' flag is set so we<a name="line.1862"></a> |
| <span class="sourceLineNo">1863</span> * push through the SCP though context may indicate already-running-SCP (An old SCP may have<a name="line.1863"></a> |
| <span class="sourceLineNo">1864</span> * exited abnormally, or damaged cluster may still have references in hbase:meta to 'Unknown<a name="line.1864"></a> |
| <span class="sourceLineNo">1865</span> * Servers' -- servers that are not online or in dead servers list, etc.)<a name="line.1865"></a> |
| <span class="sourceLineNo">1866</span> * @param force Set if the request came in externally over RPC (via hbck2). Force means run the<a name="line.1866"></a> |
| <span class="sourceLineNo">1867</span> * SCP even if it seems as though there might be an outstanding SCP running.<a name="line.1867"></a> |
| <span class="sourceLineNo">1868</span> * @return pid of scheduled SCP or {@link Procedure#NO_PROC_ID} if none scheduled.<a name="line.1868"></a> |
| <span class="sourceLineNo">1869</span> */<a name="line.1869"></a> |
| <span class="sourceLineNo">1870</span> public long submitServerCrash(ServerName serverName, boolean shouldSplitWal, boolean force) {<a name="line.1870"></a> |
| <span class="sourceLineNo">1871</span> // May be an 'Unknown Server' so handle case where serverNode is null.<a name="line.1871"></a> |
| <span class="sourceLineNo">1872</span> ServerStateNode serverNode = regionStates.getServerNode(serverName);<a name="line.1872"></a> |
| <span class="sourceLineNo">1873</span> // Remove the in-memory rsReports result<a name="line.1873"></a> |
| <span class="sourceLineNo">1874</span> synchronized (rsReports) {<a name="line.1874"></a> |
| <span class="sourceLineNo">1875</span> rsReports.remove(serverName);<a name="line.1875"></a> |
| <span class="sourceLineNo">1876</span> }<a name="line.1876"></a> |
| <span class="sourceLineNo">1877</span> if (serverNode == null) {<a name="line.1877"></a> |
| <span class="sourceLineNo">1878</span> if (force) {<a name="line.1878"></a> |
| <span class="sourceLineNo">1879</span> LOG.info("Force adding ServerCrashProcedure for {} when server node is null", serverName);<a name="line.1879"></a> |
| <span class="sourceLineNo">1880</span> } else {<a name="line.1880"></a> |
| <span class="sourceLineNo">1881</span> // for normal case, do not schedule SCP if ServerStateNode is null<a name="line.1881"></a> |
| <span class="sourceLineNo">1882</span> LOG.warn("Skip adding ServerCrashProcedure for {} because server node is null", serverName);<a name="line.1882"></a> |
| <span class="sourceLineNo">1883</span> return Procedure.NO_PROC_ID;<a name="line.1883"></a> |
| <span class="sourceLineNo">1884</span> }<a name="line.1884"></a> |
| <span class="sourceLineNo">1885</span> }<a name="line.1885"></a> |
| <span class="sourceLineNo">1886</span><a name="line.1886"></a> |
| <span class="sourceLineNo">1887</span> ProcedureExecutor<MasterProcedureEnv> procExec = this.master.getMasterProcedureExecutor();<a name="line.1887"></a> |
| <span class="sourceLineNo">1888</span> // We hold the write lock here for fencing on reportRegionStateTransition. Once we set the<a name="line.1888"></a> |
| <span class="sourceLineNo">1889</span> // server state to CRASHED, we will no longer accept the reportRegionStateTransition call from<a name="line.1889"></a> |
| <span class="sourceLineNo">1890</span> // this server. This is used to simplify the implementation for TRSP and SCP, where we can make<a name="line.1890"></a> |
| <span class="sourceLineNo">1891</span> // sure that, the region list fetched by SCP will not be changed any more.<a name="line.1891"></a> |
| <span class="sourceLineNo">1892</span> if (serverNode != null) {<a name="line.1892"></a> |
| <span class="sourceLineNo">1893</span> serverNode.writeLock().lock();<a name="line.1893"></a> |
| <span class="sourceLineNo">1894</span> }<a name="line.1894"></a> |
| <span class="sourceLineNo">1895</span> try {<a name="line.1895"></a> |
| <span class="sourceLineNo">1896</span><a name="line.1896"></a> |
| <span class="sourceLineNo">1897</span> boolean carryingMeta = isCarryingMeta(serverName);<a name="line.1897"></a> |
| <span class="sourceLineNo">1898</span> if (serverNode != null && !serverNode.isInState(ServerState.ONLINE)) {<a name="line.1898"></a> |
| <span class="sourceLineNo">1899</span> if (force) {<a name="line.1899"></a> |
| <span class="sourceLineNo">1900</span> LOG.info("Force adding ServerCrashProcedure for {} (meta={}) when state is not {}",<a name="line.1900"></a> |
| <span class="sourceLineNo">1901</span> serverNode, carryingMeta, ServerState.ONLINE);<a name="line.1901"></a> |
| <span class="sourceLineNo">1902</span> } else {<a name="line.1902"></a> |
| <span class="sourceLineNo">1903</span> LOG.info("Skip adding ServerCrashProcedure for {} (meta={}) when state is not {}",<a name="line.1903"></a> |
| <span class="sourceLineNo">1904</span> serverNode, carryingMeta, ServerState.ONLINE);<a name="line.1904"></a> |
| <span class="sourceLineNo">1905</span> return Procedure.NO_PROC_ID;<a name="line.1905"></a> |
| <span class="sourceLineNo">1906</span> }<a name="line.1906"></a> |
| <span class="sourceLineNo">1907</span> }<a name="line.1907"></a> |
| <span class="sourceLineNo">1908</span> MasterProcedureEnv mpe = procExec.getEnvironment();<a name="line.1908"></a> |
| <span class="sourceLineNo">1909</span> // If serverNode == null, then 'Unknown Server'. Schedule HBCKSCP instead.<a name="line.1909"></a> |
| <span class="sourceLineNo">1910</span> // HBCKSCP scours Master in-memory state AND hbase;meta for references to<a name="line.1910"></a> |
| <span class="sourceLineNo">1911</span> // serverName just-in-case. An SCP that is scheduled when the server is<a name="line.1911"></a> |
| <span class="sourceLineNo">1912</span> // 'Unknown' probably originated externally with HBCK2 fix-it tool.<a name="line.1912"></a> |
| <span class="sourceLineNo">1913</span> ServerState oldState = null;<a name="line.1913"></a> |
| <span class="sourceLineNo">1914</span> if (serverNode != null) {<a name="line.1914"></a> |
| <span class="sourceLineNo">1915</span> oldState = serverNode.getState();<a name="line.1915"></a> |
| <span class="sourceLineNo">1916</span> serverNode.setState(ServerState.CRASHED);<a name="line.1916"></a> |
| <span class="sourceLineNo">1917</span> }<a name="line.1917"></a> |
| <span class="sourceLineNo">1918</span> ServerCrashProcedure scp = force<a name="line.1918"></a> |
| <span class="sourceLineNo">1919</span> ? new HBCKServerCrashProcedure(mpe, serverName, shouldSplitWal, carryingMeta)<a name="line.1919"></a> |
| <span class="sourceLineNo">1920</span> : new ServerCrashProcedure(mpe, serverName, shouldSplitWal, carryingMeta);<a name="line.1920"></a> |
| <span class="sourceLineNo">1921</span> long pid = procExec.submitProcedure(scp);<a name="line.1921"></a> |
| <span class="sourceLineNo">1922</span> LOG.info("Scheduled ServerCrashProcedure pid={} for {} (carryingMeta={}){}.", pid, serverName,<a name="line.1922"></a> |
| <span class="sourceLineNo">1923</span> carryingMeta,<a name="line.1923"></a> |
| <span class="sourceLineNo">1924</span> serverNode == null ? "" : " " + serverNode.toString() + ", oldState=" + oldState);<a name="line.1924"></a> |
| <span class="sourceLineNo">1925</span> return pid;<a name="line.1925"></a> |
| <span class="sourceLineNo">1926</span> } finally {<a name="line.1926"></a> |
| <span class="sourceLineNo">1927</span> if (serverNode != null) {<a name="line.1927"></a> |
| <span class="sourceLineNo">1928</span> serverNode.writeLock().unlock();<a name="line.1928"></a> |
| <span class="sourceLineNo">1929</span> }<a name="line.1929"></a> |
| <span class="sourceLineNo">1930</span> }<a name="line.1930"></a> |
| <span class="sourceLineNo">1931</span> }<a name="line.1931"></a> |
| <span class="sourceLineNo">1932</span><a name="line.1932"></a> |
| <span class="sourceLineNo">1933</span> public void offlineRegion(final RegionInfo regionInfo) {<a name="line.1933"></a> |
| <span class="sourceLineNo">1934</span> // TODO used by MasterRpcServices<a name="line.1934"></a> |
| <span class="sourceLineNo">1935</span> RegionStateNode node = regionStates.getRegionStateNode(regionInfo);<a name="line.1935"></a> |
| <span class="sourceLineNo">1936</span> if (node != null) {<a name="line.1936"></a> |
| <span class="sourceLineNo">1937</span> node.offline();<a name="line.1937"></a> |
| <span class="sourceLineNo">1938</span> }<a name="line.1938"></a> |
| <span class="sourceLineNo">1939</span> }<a name="line.1939"></a> |
| <span class="sourceLineNo">1940</span><a name="line.1940"></a> |
| <span class="sourceLineNo">1941</span> public void onlineRegion(final RegionInfo regionInfo, final ServerName serverName) {<a name="line.1941"></a> |
| <span class="sourceLineNo">1942</span> // TODO used by TestSplitTransactionOnCluster.java<a name="line.1942"></a> |
| <span class="sourceLineNo">1943</span> }<a name="line.1943"></a> |
| <span class="sourceLineNo">1944</span><a name="line.1944"></a> |
| <span class="sourceLineNo">1945</span> public Map<ServerName, List<RegionInfo>><a name="line.1945"></a> |
| <span class="sourceLineNo">1946</span> getSnapShotOfAssignment(final Collection<RegionInfo> regions) {<a name="line.1946"></a> |
| <span class="sourceLineNo">1947</span> return regionStates.getSnapShotOfAssignment(regions);<a name="line.1947"></a> |
| <span class="sourceLineNo">1948</span> }<a name="line.1948"></a> |
| <span class="sourceLineNo">1949</span><a name="line.1949"></a> |
| <span class="sourceLineNo">1950</span> // ============================================================================================<a name="line.1950"></a> |
| <span class="sourceLineNo">1951</span> // TODO: UTILS/HELPERS?<a name="line.1951"></a> |
| <span class="sourceLineNo">1952</span> // ============================================================================================<a name="line.1952"></a> |
| <span class="sourceLineNo">1953</span> /**<a name="line.1953"></a> |
| <span class="sourceLineNo">1954</span> * Used by the client (via master) to identify if all regions have the schema updates<a name="line.1954"></a> |
| <span class="sourceLineNo">1955</span> * @return Pair indicating the status of the alter command (pending/total)<a name="line.1955"></a> |
| <span class="sourceLineNo">1956</span> */<a name="line.1956"></a> |
| <span class="sourceLineNo">1957</span> public Pair<Integer, Integer> getReopenStatus(TableName tableName) {<a name="line.1957"></a> |
| <span class="sourceLineNo">1958</span> if (isTableDisabled(tableName)) {<a name="line.1958"></a> |
| <span class="sourceLineNo">1959</span> return new Pair<Integer, Integer>(0, 0);<a name="line.1959"></a> |
| <span class="sourceLineNo">1960</span> }<a name="line.1960"></a> |
| <span class="sourceLineNo">1961</span><a name="line.1961"></a> |
| <span class="sourceLineNo">1962</span> final List<RegionState> states = regionStates.getTableRegionStates(tableName);<a name="line.1962"></a> |
| <span class="sourceLineNo">1963</span> int ritCount = 0;<a name="line.1963"></a> |
| <span class="sourceLineNo">1964</span> for (RegionState regionState : states) {<a name="line.1964"></a> |
| <span class="sourceLineNo">1965</span> if (!regionState.isOpened() && !regionState.isSplit()) {<a name="line.1965"></a> |
| <span class="sourceLineNo">1966</span> ritCount++;<a name="line.1966"></a> |
| <span class="sourceLineNo">1967</span> }<a name="line.1967"></a> |
| <span class="sourceLineNo">1968</span> }<a name="line.1968"></a> |
| <span class="sourceLineNo">1969</span> return new Pair<Integer, Integer>(ritCount, states.size());<a name="line.1969"></a> |
| <span class="sourceLineNo">1970</span> }<a name="line.1970"></a> |
| <span class="sourceLineNo">1971</span><a name="line.1971"></a> |
| <span class="sourceLineNo">1972</span> // ============================================================================================<a name="line.1972"></a> |
| <span class="sourceLineNo">1973</span> // TODO: Region State In Transition<a name="line.1973"></a> |
| <span class="sourceLineNo">1974</span> // ============================================================================================<a name="line.1974"></a> |
| <span class="sourceLineNo">1975</span> public boolean hasRegionsInTransition() {<a name="line.1975"></a> |
| <span class="sourceLineNo">1976</span> return regionStates.hasRegionsInTransition();<a name="line.1976"></a> |
| <span class="sourceLineNo">1977</span> }<a name="line.1977"></a> |
| <span class="sourceLineNo">1978</span><a name="line.1978"></a> |
| <span class="sourceLineNo">1979</span> public List<RegionStateNode> getRegionsInTransition() {<a name="line.1979"></a> |
| <span class="sourceLineNo">1980</span> return regionStates.getRegionsInTransition();<a name="line.1980"></a> |
| <span class="sourceLineNo">1981</span> }<a name="line.1981"></a> |
| <span class="sourceLineNo">1982</span><a name="line.1982"></a> |
| <span class="sourceLineNo">1983</span> public List<RegionInfo> getAssignedRegions() {<a name="line.1983"></a> |
| <span class="sourceLineNo">1984</span> return regionStates.getAssignedRegions();<a name="line.1984"></a> |
| <span class="sourceLineNo">1985</span> }<a name="line.1985"></a> |
| <span class="sourceLineNo">1986</span><a name="line.1986"></a> |
| <span class="sourceLineNo">1987</span> /**<a name="line.1987"></a> |
| <span class="sourceLineNo">1988</span> * Resolve a cached {@link RegionInfo} from the region name as a {@code byte[]}.<a name="line.1988"></a> |
| <span class="sourceLineNo">1989</span> */<a name="line.1989"></a> |
| <span class="sourceLineNo">1990</span> public RegionInfo getRegionInfo(final byte[] regionName) {<a name="line.1990"></a> |
| <span class="sourceLineNo">1991</span> final RegionStateNode regionState = regionStates.getRegionStateNodeFromName(regionName);<a name="line.1991"></a> |
| <span class="sourceLineNo">1992</span> return regionState != null ? regionState.getRegionInfo() : null;<a name="line.1992"></a> |
| <span class="sourceLineNo">1993</span> }<a name="line.1993"></a> |
| <span class="sourceLineNo">1994</span><a name="line.1994"></a> |
| <span class="sourceLineNo">1995</span> /**<a name="line.1995"></a> |
| <span class="sourceLineNo">1996</span> * Resolve a cached {@link RegionInfo} from the encoded region name as a {@code String}.<a name="line.1996"></a> |
| <span class="sourceLineNo">1997</span> */<a name="line.1997"></a> |
| <span class="sourceLineNo">1998</span> public RegionInfo getRegionInfo(final String encodedRegionName) {<a name="line.1998"></a> |
| <span class="sourceLineNo">1999</span> final RegionStateNode regionState =<a name="line.1999"></a> |
| <span class="sourceLineNo">2000</span> regionStates.getRegionStateNodeFromEncodedRegionName(encodedRegionName);<a name="line.2000"></a> |
| <span class="sourceLineNo">2001</span> return regionState != null ? regionState.getRegionInfo() : null;<a name="line.2001"></a> |
| <span class="sourceLineNo">2002</span> }<a name="line.2002"></a> |
| <span class="sourceLineNo">2003</span><a name="line.2003"></a> |
| <span class="sourceLineNo">2004</span> // ============================================================================================<a name="line.2004"></a> |
| <span class="sourceLineNo">2005</span> // Expected states on region state transition.<a name="line.2005"></a> |
| <span class="sourceLineNo">2006</span> // Notice that there is expected states for transiting to OPENING state, this is because SCP.<a name="line.2006"></a> |
| <span class="sourceLineNo">2007</span> // See the comments in regionOpening method for more details.<a name="line.2007"></a> |
| <span class="sourceLineNo">2008</span> // ============================================================================================<a name="line.2008"></a> |
| <span class="sourceLineNo">2009</span> private static final State[] STATES_EXPECTED_ON_OPEN = { State.OPENING, // Normal case<a name="line.2009"></a> |
| <span class="sourceLineNo">2010</span> State.OPEN // Retrying<a name="line.2010"></a> |
| <span class="sourceLineNo">2011</span> };<a name="line.2011"></a> |
| <span class="sourceLineNo">2012</span><a name="line.2012"></a> |
| <span class="sourceLineNo">2013</span> private static final State[] STATES_EXPECTED_ON_CLOSING = { State.OPEN, // Normal case<a name="line.2013"></a> |
| <span class="sourceLineNo">2014</span> State.CLOSING, // Retrying<a name="line.2014"></a> |
| <span class="sourceLineNo">2015</span> State.SPLITTING, // Offline the split parent<a name="line.2015"></a> |
| <span class="sourceLineNo">2016</span> State.MERGING // Offline the merge parents<a name="line.2016"></a> |
| <span class="sourceLineNo">2017</span> };<a name="line.2017"></a> |
| <span class="sourceLineNo">2018</span><a name="line.2018"></a> |
| <span class="sourceLineNo">2019</span> private static final State[] STATES_EXPECTED_ON_CLOSED = { State.CLOSING, // Normal case<a name="line.2019"></a> |
| <span class="sourceLineNo">2020</span> State.CLOSED // Retrying<a name="line.2020"></a> |
| <span class="sourceLineNo">2021</span> };<a name="line.2021"></a> |
| <span class="sourceLineNo">2022</span><a name="line.2022"></a> |
| <span class="sourceLineNo">2023</span> // This is for manually scheduled region assign, can add other states later if we find out other<a name="line.2023"></a> |
| <span class="sourceLineNo">2024</span> // usages<a name="line.2024"></a> |
| <span class="sourceLineNo">2025</span> private static final State[] STATES_EXPECTED_ON_ASSIGN = { State.CLOSED, State.OFFLINE };<a name="line.2025"></a> |
| <span class="sourceLineNo">2026</span><a name="line.2026"></a> |
| <span class="sourceLineNo">2027</span> // We only allow unassign or move a region which is in OPEN state.<a name="line.2027"></a> |
| <span class="sourceLineNo">2028</span> private static final State[] STATES_EXPECTED_ON_UNASSIGN_OR_MOVE = { State.OPEN };<a name="line.2028"></a> |
| <span class="sourceLineNo">2029</span><a name="line.2029"></a> |
| <span class="sourceLineNo">2030</span> // ============================================================================================<a name="line.2030"></a> |
| <span class="sourceLineNo">2031</span> // Region Status update<a name="line.2031"></a> |
| <span class="sourceLineNo">2032</span> // Should only be called in TransitRegionStateProcedure(and related procedures), as the locking<a name="line.2032"></a> |
| <span class="sourceLineNo">2033</span> // and pre-assumptions are very tricky.<a name="line.2033"></a> |
| <span class="sourceLineNo">2034</span> // ============================================================================================<a name="line.2034"></a> |
| <span class="sourceLineNo">2035</span> private CompletableFuture<Void> transitStateAndUpdate(RegionStateNode regionNode,<a name="line.2035"></a> |
| <span class="sourceLineNo">2036</span> RegionState.State newState, RegionState.State... expectedStates) {<a name="line.2036"></a> |
| <span class="sourceLineNo">2037</span> RegionState.State state = regionNode.getState();<a name="line.2037"></a> |
| <span class="sourceLineNo">2038</span> try {<a name="line.2038"></a> |
| <span class="sourceLineNo">2039</span> regionNode.transitionState(newState, expectedStates);<a name="line.2039"></a> |
| <span class="sourceLineNo">2040</span> } catch (UnexpectedStateException e) {<a name="line.2040"></a> |
| <span class="sourceLineNo">2041</span> return FutureUtils.failedFuture(e);<a name="line.2041"></a> |
| <span class="sourceLineNo">2042</span> }<a name="line.2042"></a> |
| <span class="sourceLineNo">2043</span> CompletableFuture<Void> future = regionStateStore.updateRegionLocation(regionNode);<a name="line.2043"></a> |
| <span class="sourceLineNo">2044</span> FutureUtils.addListener(future, (r, e) -> {<a name="line.2044"></a> |
| <span class="sourceLineNo">2045</span> if (e != null) {<a name="line.2045"></a> |
| <span class="sourceLineNo">2046</span> // revert<a name="line.2046"></a> |
| <span class="sourceLineNo">2047</span> regionNode.setState(state);<a name="line.2047"></a> |
| <span class="sourceLineNo">2048</span> }<a name="line.2048"></a> |
| <span class="sourceLineNo">2049</span> });<a name="line.2049"></a> |
| <span class="sourceLineNo">2050</span> return future;<a name="line.2050"></a> |
| <span class="sourceLineNo">2051</span> }<a name="line.2051"></a> |
| <span class="sourceLineNo">2052</span><a name="line.2052"></a> |
| <span class="sourceLineNo">2053</span> // should be called within the synchronized block of RegionStateNode<a name="line.2053"></a> |
| <span class="sourceLineNo">2054</span> CompletableFuture<Void> regionOpening(RegionStateNode regionNode) {<a name="line.2054"></a> |
| <span class="sourceLineNo">2055</span> // As in SCP, for performance reason, there is no TRSP attached with this region, we will not<a name="line.2055"></a> |
| <span class="sourceLineNo">2056</span> // update the region state, which means that the region could be in any state when we want to<a name="line.2056"></a> |
| <span class="sourceLineNo">2057</span> // assign it after a RS crash. So here we do not pass the expectedStates parameter.<a name="line.2057"></a> |
| <span class="sourceLineNo">2058</span> return transitStateAndUpdate(regionNode, State.OPENING).thenAccept(r -> {<a name="line.2058"></a> |
| <span class="sourceLineNo">2059</span> ServerStateNode serverNode = regionStates.getServerNode(regionNode.getRegionLocation());<a name="line.2059"></a> |
| <span class="sourceLineNo">2060</span> // Here the server node could be null. For example, we want to assign the region to a given<a name="line.2060"></a> |
| <span class="sourceLineNo">2061</span> // region server and it crashes, and it is the region server which holds hbase:meta, then the<a name="line.2061"></a> |
| <span class="sourceLineNo">2062</span> // above transitStateAndUpdate call will never succeed until we finishes the SCP for it. But<a name="line.2062"></a> |
| <span class="sourceLineNo">2063</span> // after the SCP finishes, the server node will be removed, so when we arrive there, the<a name="line.2063"></a> |
| <span class="sourceLineNo">2064</span> // server<a name="line.2064"></a> |
| <span class="sourceLineNo">2065</span> // node will be null. This is not a big problem if we skip adding it, as later we will fail to<a name="line.2065"></a> |
| <span class="sourceLineNo">2066</span> // execute the remote procedure on the region server and then try to assign to another region<a name="line.2066"></a> |
| <span class="sourceLineNo">2067</span> // server<a name="line.2067"></a> |
| <span class="sourceLineNo">2068</span> if (serverNode != null) {<a name="line.2068"></a> |
| <span class="sourceLineNo">2069</span> serverNode.addRegion(regionNode);<a name="line.2069"></a> |
| <span class="sourceLineNo">2070</span> }<a name="line.2070"></a> |
| <span class="sourceLineNo">2071</span> // update the operation count metrics<a name="line.2071"></a> |
| <span class="sourceLineNo">2072</span> metrics.incrementOperationCounter();<a name="line.2072"></a> |
| <span class="sourceLineNo">2073</span> });<a name="line.2073"></a> |
| <span class="sourceLineNo">2074</span> }<a name="line.2074"></a> |
| <span class="sourceLineNo">2075</span><a name="line.2075"></a> |
| <span class="sourceLineNo">2076</span> // should be called under the RegionStateNode lock<a name="line.2076"></a> |
| <span class="sourceLineNo">2077</span> // The parameter 'giveUp' means whether we will try to open the region again, if it is true, then<a name="line.2077"></a> |
| <span class="sourceLineNo">2078</span> // we will persist the FAILED_OPEN state into hbase:meta.<a name="line.2078"></a> |
| <span class="sourceLineNo">2079</span> CompletableFuture<Void> regionFailedOpen(RegionStateNode regionNode, boolean giveUp) {<a name="line.2079"></a> |
| <span class="sourceLineNo">2080</span> RegionState.State state = regionNode.getState();<a name="line.2080"></a> |
| <span class="sourceLineNo">2081</span> ServerName regionLocation = regionNode.getRegionLocation();<a name="line.2081"></a> |
| <span class="sourceLineNo">2082</span> if (!giveUp) {<a name="line.2082"></a> |
| <span class="sourceLineNo">2083</span> if (regionLocation != null) {<a name="line.2083"></a> |
| <span class="sourceLineNo">2084</span> regionStates.removeRegionFromServer(regionLocation, regionNode);<a name="line.2084"></a> |
| <span class="sourceLineNo">2085</span> }<a name="line.2085"></a> |
| <span class="sourceLineNo">2086</span> return CompletableFuture.completedFuture(null);<a name="line.2086"></a> |
| <span class="sourceLineNo">2087</span> }<a name="line.2087"></a> |
| <span class="sourceLineNo">2088</span> regionNode.setState(State.FAILED_OPEN);<a name="line.2088"></a> |
| <span class="sourceLineNo">2089</span> regionNode.setRegionLocation(null);<a name="line.2089"></a> |
| <span class="sourceLineNo">2090</span> CompletableFuture<Void> future = regionStateStore.updateRegionLocation(regionNode);<a name="line.2090"></a> |
| <span class="sourceLineNo">2091</span> FutureUtils.addListener(future, (r, e) -> {<a name="line.2091"></a> |
| <span class="sourceLineNo">2092</span> if (e == null) {<a name="line.2092"></a> |
| <span class="sourceLineNo">2093</span> if (regionLocation != null) {<a name="line.2093"></a> |
| <span class="sourceLineNo">2094</span> regionStates.removeRegionFromServer(regionLocation, regionNode);<a name="line.2094"></a> |
| <span class="sourceLineNo">2095</span> }<a name="line.2095"></a> |
| <span class="sourceLineNo">2096</span> } else {<a name="line.2096"></a> |
| <span class="sourceLineNo">2097</span> // revert<a name="line.2097"></a> |
| <span class="sourceLineNo">2098</span> regionNode.setState(state);<a name="line.2098"></a> |
| <span class="sourceLineNo">2099</span> regionNode.setRegionLocation(regionLocation);<a name="line.2099"></a> |
| <span class="sourceLineNo">2100</span> }<a name="line.2100"></a> |
| <span class="sourceLineNo">2101</span> });<a name="line.2101"></a> |
| <span class="sourceLineNo">2102</span> return future;<a name="line.2102"></a> |
| <span class="sourceLineNo">2103</span> }<a name="line.2103"></a> |
| <span class="sourceLineNo">2104</span><a name="line.2104"></a> |
| <span class="sourceLineNo">2105</span> // should be called under the RegionStateNode lock<a name="line.2105"></a> |
| <span class="sourceLineNo">2106</span> CompletableFuture<Void> regionClosing(RegionStateNode regionNode) {<a name="line.2106"></a> |
| <span class="sourceLineNo">2107</span> return transitStateAndUpdate(regionNode, State.CLOSING, STATES_EXPECTED_ON_CLOSING)<a name="line.2107"></a> |
| <span class="sourceLineNo">2108</span> .thenAccept(r -> {<a name="line.2108"></a> |
| <span class="sourceLineNo">2109</span> RegionInfo hri = regionNode.getRegionInfo();<a name="line.2109"></a> |
| <span class="sourceLineNo">2110</span> // Set meta has not initialized early. so people trying to create/edit tables will wait<a name="line.2110"></a> |
| <span class="sourceLineNo">2111</span> if (isMetaRegion(hri)) {<a name="line.2111"></a> |
| <span class="sourceLineNo">2112</span> setMetaAssigned(hri, false);<a name="line.2112"></a> |
| <span class="sourceLineNo">2113</span> }<a name="line.2113"></a> |
| <span class="sourceLineNo">2114</span> // update the operation count metrics<a name="line.2114"></a> |
| <span class="sourceLineNo">2115</span> metrics.incrementOperationCounter();<a name="line.2115"></a> |
| <span class="sourceLineNo">2116</span> });<a name="line.2116"></a> |
| <span class="sourceLineNo">2117</span> }<a name="line.2117"></a> |
| <span class="sourceLineNo">2118</span><a name="line.2118"></a> |
| <span class="sourceLineNo">2119</span> // for open and close, they will first be persist to the procedure store in<a name="line.2119"></a> |
| <span class="sourceLineNo">2120</span> // RegionRemoteProcedureBase. So here we will first change the in memory state as it is considered<a name="line.2120"></a> |
| <span class="sourceLineNo">2121</span> // as succeeded if the persistence to procedure store is succeeded, and then when the<a name="line.2121"></a> |
| <span class="sourceLineNo">2122</span> // RegionRemoteProcedureBase is woken up, we will persist the RegionStateNode to hbase:meta.<a name="line.2122"></a> |
| <span class="sourceLineNo">2123</span><a name="line.2123"></a> |
| <span class="sourceLineNo">2124</span> // should be called under the RegionStateNode lock<a name="line.2124"></a> |
| <span class="sourceLineNo">2125</span> void regionOpenedWithoutPersistingToMeta(RegionStateNode regionNode)<a name="line.2125"></a> |
| <span class="sourceLineNo">2126</span> throws UnexpectedStateException {<a name="line.2126"></a> |
| <span class="sourceLineNo">2127</span> regionNode.transitionState(State.OPEN, STATES_EXPECTED_ON_OPEN);<a name="line.2127"></a> |
| <span class="sourceLineNo">2128</span> RegionInfo regionInfo = regionNode.getRegionInfo();<a name="line.2128"></a> |
| <span class="sourceLineNo">2129</span> regionStates.addRegionToServer(regionNode);<a name="line.2129"></a> |
| <span class="sourceLineNo">2130</span> regionStates.removeFromFailedOpen(regionInfo);<a name="line.2130"></a> |
| <span class="sourceLineNo">2131</span> }<a name="line.2131"></a> |
| <span class="sourceLineNo">2132</span><a name="line.2132"></a> |
| <span class="sourceLineNo">2133</span> // should be called under the RegionStateNode lock<a name="line.2133"></a> |
| <span class="sourceLineNo">2134</span> void regionClosedWithoutPersistingToMeta(RegionStateNode regionNode)<a name="line.2134"></a> |
| <span class="sourceLineNo">2135</span> throws UnexpectedStateException {<a name="line.2135"></a> |
| <span class="sourceLineNo">2136</span> ServerName regionLocation = regionNode.getRegionLocation();<a name="line.2136"></a> |
| <span class="sourceLineNo">2137</span> regionNode.transitionState(State.CLOSED, STATES_EXPECTED_ON_CLOSED);<a name="line.2137"></a> |
| <span class="sourceLineNo">2138</span> regionNode.setRegionLocation(null);<a name="line.2138"></a> |
| <span class="sourceLineNo">2139</span> if (regionLocation != null) {<a name="line.2139"></a> |
| <span class="sourceLineNo">2140</span> regionNode.setLastHost(regionLocation);<a name="line.2140"></a> |
| <span class="sourceLineNo">2141</span> regionStates.removeRegionFromServer(regionLocation, regionNode);<a name="line.2141"></a> |
| <span class="sourceLineNo">2142</span> }<a name="line.2142"></a> |
| <span class="sourceLineNo">2143</span> }<a name="line.2143"></a> |
| <span class="sourceLineNo">2144</span><a name="line.2144"></a> |
| <span class="sourceLineNo">2145</span> // should be called under the RegionStateNode lock<a name="line.2145"></a> |
| <span class="sourceLineNo">2146</span> CompletableFuture<Void> persistToMeta(RegionStateNode regionNode) {<a name="line.2146"></a> |
| <span class="sourceLineNo">2147</span> return regionStateStore.updateRegionLocation(regionNode).thenAccept(r -> {<a name="line.2147"></a> |
| <span class="sourceLineNo">2148</span> RegionInfo regionInfo = regionNode.getRegionInfo();<a name="line.2148"></a> |
| <span class="sourceLineNo">2149</span> if (isMetaRegion(regionInfo) && regionNode.getState() == State.OPEN) {<a name="line.2149"></a> |
| <span class="sourceLineNo">2150</span> // Usually we'd set a table ENABLED at this stage but hbase:meta is ALWAYs enabled, it<a name="line.2150"></a> |
| <span class="sourceLineNo">2151</span> // can't be disabled -- so skip the RPC (besides... enabled is managed by TableStateManager<a name="line.2151"></a> |
| <span class="sourceLineNo">2152</span> // which is backed by hbase:meta... Avoid setting ENABLED to avoid having to update state<a name="line.2152"></a> |
| <span class="sourceLineNo">2153</span> // on table that contains state.<a name="line.2153"></a> |
| <span class="sourceLineNo">2154</span> setMetaAssigned(regionInfo, true);<a name="line.2154"></a> |
| <span class="sourceLineNo">2155</span> }<a name="line.2155"></a> |
| <span class="sourceLineNo">2156</span> });<a name="line.2156"></a> |
| <span class="sourceLineNo">2157</span> }<a name="line.2157"></a> |
| <span class="sourceLineNo">2158</span><a name="line.2158"></a> |
| <span class="sourceLineNo">2159</span> // should be called under the RegionStateNode lock<a name="line.2159"></a> |
| <span class="sourceLineNo">2160</span> // for SCP<a name="line.2160"></a> |
| <span class="sourceLineNo">2161</span> public CompletableFuture<Void> regionClosedAbnormally(RegionStateNode regionNode) {<a name="line.2161"></a> |
| <span class="sourceLineNo">2162</span> RegionState.State state = regionNode.getState();<a name="line.2162"></a> |
| <span class="sourceLineNo">2163</span> ServerName regionLocation = regionNode.getRegionLocation();<a name="line.2163"></a> |
| <span class="sourceLineNo">2164</span> regionNode.setState(State.ABNORMALLY_CLOSED);<a name="line.2164"></a> |
| <span class="sourceLineNo">2165</span> regionNode.setRegionLocation(null);<a name="line.2165"></a> |
| <span class="sourceLineNo">2166</span> CompletableFuture<Void> future = regionStateStore.updateRegionLocation(regionNode);<a name="line.2166"></a> |
| <span class="sourceLineNo">2167</span> FutureUtils.addListener(future, (r, e) -> {<a name="line.2167"></a> |
| <span class="sourceLineNo">2168</span> if (e == null) {<a name="line.2168"></a> |
| <span class="sourceLineNo">2169</span> if (regionLocation != null) {<a name="line.2169"></a> |
| <span class="sourceLineNo">2170</span> regionNode.setLastHost(regionLocation);<a name="line.2170"></a> |
| <span class="sourceLineNo">2171</span> regionStates.removeRegionFromServer(regionLocation, regionNode);<a name="line.2171"></a> |
| <span class="sourceLineNo">2172</span> }<a name="line.2172"></a> |
| <span class="sourceLineNo">2173</span> } else {<a name="line.2173"></a> |
| <span class="sourceLineNo">2174</span> // revert<a name="line.2174"></a> |
| <span class="sourceLineNo">2175</span> regionNode.setState(state);<a name="line.2175"></a> |
| <span class="sourceLineNo">2176</span> regionNode.setRegionLocation(regionLocation);<a name="line.2176"></a> |
| <span class="sourceLineNo">2177</span> }<a name="line.2177"></a> |
| <span class="sourceLineNo">2178</span> });<a name="line.2178"></a> |
| <span class="sourceLineNo">2179</span> return future;<a name="line.2179"></a> |
| <span class="sourceLineNo">2180</span> }<a name="line.2180"></a> |
| <span class="sourceLineNo">2181</span><a name="line.2181"></a> |
| <span class="sourceLineNo">2182</span> // ============================================================================================<a name="line.2182"></a> |
| <span class="sourceLineNo">2183</span> // The above methods can only be called in TransitRegionStateProcedure(and related procedures)<a name="line.2183"></a> |
| <span class="sourceLineNo">2184</span> // ============================================================================================<a name="line.2184"></a> |
| <span class="sourceLineNo">2185</span><a name="line.2185"></a> |
| <span class="sourceLineNo">2186</span> public void markRegionAsSplit(final RegionInfo parent, final ServerName serverName,<a name="line.2186"></a> |
| <span class="sourceLineNo">2187</span> final RegionInfo daughterA, final RegionInfo daughterB) throws IOException {<a name="line.2187"></a> |
| <span class="sourceLineNo">2188</span> // Update hbase:meta. Parent will be marked offline and split up in hbase:meta.<a name="line.2188"></a> |
| <span class="sourceLineNo">2189</span> // The parent stays in regionStates until cleared when removed by CatalogJanitor.<a name="line.2189"></a> |
| <span class="sourceLineNo">2190</span> // Update its state in regionStates to it shows as offline and split when read<a name="line.2190"></a> |
| <span class="sourceLineNo">2191</span> // later figuring what regions are in a table and what are not: see<a name="line.2191"></a> |
| <span class="sourceLineNo">2192</span> // regionStates#getRegionsOfTable<a name="line.2192"></a> |
| <span class="sourceLineNo">2193</span> final RegionStateNode node = regionStates.getOrCreateRegionStateNode(parent);<a name="line.2193"></a> |
| <span class="sourceLineNo">2194</span> node.setState(State.SPLIT);<a name="line.2194"></a> |
| <span class="sourceLineNo">2195</span> final RegionStateNode nodeA = regionStates.getOrCreateRegionStateNode(daughterA);<a name="line.2195"></a> |
| <span class="sourceLineNo">2196</span> nodeA.setState(State.SPLITTING_NEW);<a name="line.2196"></a> |
| <span class="sourceLineNo">2197</span> final RegionStateNode nodeB = regionStates.getOrCreateRegionStateNode(daughterB);<a name="line.2197"></a> |
| <span class="sourceLineNo">2198</span> nodeB.setState(State.SPLITTING_NEW);<a name="line.2198"></a> |
| <span class="sourceLineNo">2199</span><a name="line.2199"></a> |
| <span class="sourceLineNo">2200</span> TableDescriptor td = master.getTableDescriptors().get(parent.getTable());<a name="line.2200"></a> |
| <span class="sourceLineNo">2201</span> // TODO: here we just update the parent region info in meta, to set split and offline to true,<a name="line.2201"></a> |
| <span class="sourceLineNo">2202</span> // without changing the one in the region node. This is a bit confusing but the region info<a name="line.2202"></a> |
| <span class="sourceLineNo">2203</span> // field in RegionStateNode is not expected to be changed in the current design. Need to find a<a name="line.2203"></a> |
| <span class="sourceLineNo">2204</span> // possible way to address this problem, or at least adding more comments about the trick to<a name="line.2204"></a> |
| <span class="sourceLineNo">2205</span> // deal with this problem, that when you want to filter out split parent, you need to check both<a name="line.2205"></a> |
| <span class="sourceLineNo">2206</span> // the RegionState on whether it is split, and also the region info. If one of them matches then<a name="line.2206"></a> |
| <span class="sourceLineNo">2207</span> // it is a split parent. And usually only one of them can match, as after restart, the region<a name="line.2207"></a> |
| <span class="sourceLineNo">2208</span> // state will be changed from SPLIT to CLOSED.<a name="line.2208"></a> |
| <span class="sourceLineNo">2209</span> regionStateStore.splitRegion(parent, daughterA, daughterB, serverName, td);<a name="line.2209"></a> |
| <span class="sourceLineNo">2210</span> if (shouldAssignFavoredNodes(parent)) {<a name="line.2210"></a> |
| <span class="sourceLineNo">2211</span> List<ServerName> onlineServers = this.master.getServerManager().getOnlineServersList();<a name="line.2211"></a> |
| <span class="sourceLineNo">2212</span> getFavoredNodePromoter().generateFavoredNodesForDaughter(onlineServers, parent, daughterA,<a name="line.2212"></a> |
| <span class="sourceLineNo">2213</span> daughterB);<a name="line.2213"></a> |
| <span class="sourceLineNo">2214</span> }<a name="line.2214"></a> |
| <span class="sourceLineNo">2215</span> }<a name="line.2215"></a> |
| <span class="sourceLineNo">2216</span><a name="line.2216"></a> |
| <span class="sourceLineNo">2217</span> /**<a name="line.2217"></a> |
| <span class="sourceLineNo">2218</span> * When called here, the merge has happened. The merged regions have been unassigned and the above<a name="line.2218"></a> |
| <span class="sourceLineNo">2219</span> * markRegionClosed has been called on each so they have been disassociated from a hosting Server.<a name="line.2219"></a> |
| <span class="sourceLineNo">2220</span> * The merged region will be open after this call. The merged regions are removed from hbase:meta<a name="line.2220"></a> |
| <span class="sourceLineNo">2221</span> * below. Later they are deleted from the filesystem by the catalog janitor running against<a name="line.2221"></a> |
| <span class="sourceLineNo">2222</span> * hbase:meta. It notices when the merged region no longer holds references to the old regions<a name="line.2222"></a> |
| <span class="sourceLineNo">2223</span> * (References are deleted after a compaction rewrites what the Reference points at but not until<a name="line.2223"></a> |
| <span class="sourceLineNo">2224</span> * the archiver chore runs, are the References removed).<a name="line.2224"></a> |
| <span class="sourceLineNo">2225</span> */<a name="line.2225"></a> |
| <span class="sourceLineNo">2226</span> public void markRegionAsMerged(final RegionInfo child, final ServerName serverName,<a name="line.2226"></a> |
| <span class="sourceLineNo">2227</span> RegionInfo[] mergeParents) throws IOException {<a name="line.2227"></a> |
| <span class="sourceLineNo">2228</span> final RegionStateNode node = regionStates.getOrCreateRegionStateNode(child);<a name="line.2228"></a> |
| <span class="sourceLineNo">2229</span> node.setState(State.MERGED);<a name="line.2229"></a> |
| <span class="sourceLineNo">2230</span> for (RegionInfo ri : mergeParents) {<a name="line.2230"></a> |
| <span class="sourceLineNo">2231</span> regionStates.deleteRegion(ri);<a name="line.2231"></a> |
| <span class="sourceLineNo">2232</span> }<a name="line.2232"></a> |
| <span class="sourceLineNo">2233</span> TableDescriptor td = master.getTableDescriptors().get(child.getTable());<a name="line.2233"></a> |
| <span class="sourceLineNo">2234</span> regionStateStore.mergeRegions(child, mergeParents, serverName, td);<a name="line.2234"></a> |
| <span class="sourceLineNo">2235</span> if (shouldAssignFavoredNodes(child)) {<a name="line.2235"></a> |
| <span class="sourceLineNo">2236</span> getFavoredNodePromoter().generateFavoredNodesForMergedRegion(child, mergeParents);<a name="line.2236"></a> |
| <span class="sourceLineNo">2237</span> }<a name="line.2237"></a> |
| <span class="sourceLineNo">2238</span> }<a name="line.2238"></a> |
| <span class="sourceLineNo">2239</span><a name="line.2239"></a> |
| <span class="sourceLineNo">2240</span> /*<a name="line.2240"></a> |
| <span class="sourceLineNo">2241</span> * Favored nodes should be applied only when FavoredNodes balancer is configured and the region<a name="line.2241"></a> |
| <span class="sourceLineNo">2242</span> * belongs to a non-system table.<a name="line.2242"></a> |
| <span class="sourceLineNo">2243</span> */<a name="line.2243"></a> |
| <span class="sourceLineNo">2244</span> private boolean shouldAssignFavoredNodes(RegionInfo region) {<a name="line.2244"></a> |
| <span class="sourceLineNo">2245</span> return this.shouldAssignRegionsWithFavoredNodes<a name="line.2245"></a> |
| <span class="sourceLineNo">2246</span> && FavoredNodesManager.isFavoredNodeApplicable(region);<a name="line.2246"></a> |
| <span class="sourceLineNo">2247</span> }<a name="line.2247"></a> |
| <span class="sourceLineNo">2248</span><a name="line.2248"></a> |
| <span class="sourceLineNo">2249</span> // ============================================================================================<a name="line.2249"></a> |
| <span class="sourceLineNo">2250</span> // Assign Queue (Assign/Balance)<a name="line.2250"></a> |
| <span class="sourceLineNo">2251</span> // ============================================================================================<a name="line.2251"></a> |
| <span class="sourceLineNo">2252</span> private final ArrayList<RegionStateNode> pendingAssignQueue = new ArrayList<RegionStateNode>();<a name="line.2252"></a> |
| <span class="sourceLineNo">2253</span> private final ReentrantLock assignQueueLock = new ReentrantLock();<a name="line.2253"></a> |
| <span class="sourceLineNo">2254</span> private final Condition assignQueueFullCond = assignQueueLock.newCondition();<a name="line.2254"></a> |
| <span class="sourceLineNo">2255</span><a name="line.2255"></a> |
| <span class="sourceLineNo">2256</span> /**<a name="line.2256"></a> |
| <span class="sourceLineNo">2257</span> * Add the assign operation to the assignment queue. The pending assignment operation will be<a name="line.2257"></a> |
| <span class="sourceLineNo">2258</span> * processed, and each region will be assigned by a server using the balancer.<a name="line.2258"></a> |
| <span class="sourceLineNo">2259</span> */<a name="line.2259"></a> |
| <span class="sourceLineNo">2260</span> protected void queueAssign(final RegionStateNode regionNode) {<a name="line.2260"></a> |
| <span class="sourceLineNo">2261</span> regionNode.getProcedureEvent().suspend();<a name="line.2261"></a> |
| <span class="sourceLineNo">2262</span><a name="line.2262"></a> |
| <span class="sourceLineNo">2263</span> // TODO: quick-start for meta and the other sys-tables?<a name="line.2263"></a> |
| <span class="sourceLineNo">2264</span> assignQueueLock.lock();<a name="line.2264"></a> |
| <span class="sourceLineNo">2265</span> try {<a name="line.2265"></a> |
| <span class="sourceLineNo">2266</span> pendingAssignQueue.add(regionNode);<a name="line.2266"></a> |
| <span class="sourceLineNo">2267</span> if (<a name="line.2267"></a> |
| <span class="sourceLineNo">2268</span> regionNode.isSystemTable() || pendingAssignQueue.size() == 1<a name="line.2268"></a> |
| <span class="sourceLineNo">2269</span> || pendingAssignQueue.size() >= assignDispatchWaitQueueMaxSize<a name="line.2269"></a> |
| <span class="sourceLineNo">2270</span> ) {<a name="line.2270"></a> |
| <span class="sourceLineNo">2271</span> assignQueueFullCond.signal();<a name="line.2271"></a> |
| <span class="sourceLineNo">2272</span> }<a name="line.2272"></a> |
| <span class="sourceLineNo">2273</span> } finally {<a name="line.2273"></a> |
| <span class="sourceLineNo">2274</span> assignQueueLock.unlock();<a name="line.2274"></a> |
| <span class="sourceLineNo">2275</span> }<a name="line.2275"></a> |
| <span class="sourceLineNo">2276</span> }<a name="line.2276"></a> |
| <span class="sourceLineNo">2277</span><a name="line.2277"></a> |
| <span class="sourceLineNo">2278</span> private void startAssignmentThread() {<a name="line.2278"></a> |
| <span class="sourceLineNo">2279</span> assignThread = new Thread(master.getServerName().toShortString()) {<a name="line.2279"></a> |
| <span class="sourceLineNo">2280</span> @Override<a name="line.2280"></a> |
| <span class="sourceLineNo">2281</span> public void run() {<a name="line.2281"></a> |
| <span class="sourceLineNo">2282</span> while (isRunning()) {<a name="line.2282"></a> |
| <span class="sourceLineNo">2283</span> processAssignQueue();<a name="line.2283"></a> |
| <span class="sourceLineNo">2284</span> }<a name="line.2284"></a> |
| <span class="sourceLineNo">2285</span> pendingAssignQueue.clear();<a name="line.2285"></a> |
| <span class="sourceLineNo">2286</span> }<a name="line.2286"></a> |
| <span class="sourceLineNo">2287</span> };<a name="line.2287"></a> |
| <span class="sourceLineNo">2288</span> assignThread.setDaemon(true);<a name="line.2288"></a> |
| <span class="sourceLineNo">2289</span> assignThread.start();<a name="line.2289"></a> |
| <span class="sourceLineNo">2290</span> }<a name="line.2290"></a> |
| <span class="sourceLineNo">2291</span><a name="line.2291"></a> |
| <span class="sourceLineNo">2292</span> private void stopAssignmentThread() {<a name="line.2292"></a> |
| <span class="sourceLineNo">2293</span> assignQueueSignal();<a name="line.2293"></a> |
| <span class="sourceLineNo">2294</span> try {<a name="line.2294"></a> |
| <span class="sourceLineNo">2295</span> while (assignThread.isAlive()) {<a name="line.2295"></a> |
| <span class="sourceLineNo">2296</span> assignQueueSignal();<a name="line.2296"></a> |
| <span class="sourceLineNo">2297</span> assignThread.join(250);<a name="line.2297"></a> |
| <span class="sourceLineNo">2298</span> }<a name="line.2298"></a> |
| <span class="sourceLineNo">2299</span> } catch (InterruptedException e) {<a name="line.2299"></a> |
| <span class="sourceLineNo">2300</span> LOG.warn("join interrupted", e);<a name="line.2300"></a> |
| <span class="sourceLineNo">2301</span> Thread.currentThread().interrupt();<a name="line.2301"></a> |
| <span class="sourceLineNo">2302</span> }<a name="line.2302"></a> |
| <span class="sourceLineNo">2303</span> }<a name="line.2303"></a> |
| <span class="sourceLineNo">2304</span><a name="line.2304"></a> |
| <span class="sourceLineNo">2305</span> private void assignQueueSignal() {<a name="line.2305"></a> |
| <span class="sourceLineNo">2306</span> assignQueueLock.lock();<a name="line.2306"></a> |
| <span class="sourceLineNo">2307</span> try {<a name="line.2307"></a> |
| <span class="sourceLineNo">2308</span> assignQueueFullCond.signal();<a name="line.2308"></a> |
| <span class="sourceLineNo">2309</span> } finally {<a name="line.2309"></a> |
| <span class="sourceLineNo">2310</span> assignQueueLock.unlock();<a name="line.2310"></a> |
| <span class="sourceLineNo">2311</span> }<a name="line.2311"></a> |
| <span class="sourceLineNo">2312</span> }<a name="line.2312"></a> |
| <span class="sourceLineNo">2313</span><a name="line.2313"></a> |
| <span class="sourceLineNo">2314</span> @edu.umd.cs.findbugs.annotations.SuppressWarnings("WA_AWAIT_NOT_IN_LOOP")<a name="line.2314"></a> |
| <span class="sourceLineNo">2315</span> private HashMap<RegionInfo, RegionStateNode> waitOnAssignQueue() {<a name="line.2315"></a> |
| <span class="sourceLineNo">2316</span> HashMap<RegionInfo, RegionStateNode> regions = null;<a name="line.2316"></a> |
| <span class="sourceLineNo">2317</span><a name="line.2317"></a> |
| <span class="sourceLineNo">2318</span> assignQueueLock.lock();<a name="line.2318"></a> |
| <span class="sourceLineNo">2319</span> try {<a name="line.2319"></a> |
| <span class="sourceLineNo">2320</span> if (pendingAssignQueue.isEmpty() && isRunning()) {<a name="line.2320"></a> |
| <span class="sourceLineNo">2321</span> assignQueueFullCond.await();<a name="line.2321"></a> |
| <span class="sourceLineNo">2322</span> }<a name="line.2322"></a> |
| <span class="sourceLineNo">2323</span><a name="line.2323"></a> |
| <span class="sourceLineNo">2324</span> if (!isRunning()) {<a name="line.2324"></a> |
| <span class="sourceLineNo">2325</span> return null;<a name="line.2325"></a> |
| <span class="sourceLineNo">2326</span> }<a name="line.2326"></a> |
| <span class="sourceLineNo">2327</span> assignQueueFullCond.await(assignDispatchWaitMillis, TimeUnit.MILLISECONDS);<a name="line.2327"></a> |
| <span class="sourceLineNo">2328</span> regions = new HashMap<RegionInfo, RegionStateNode>(pendingAssignQueue.size());<a name="line.2328"></a> |
| <span class="sourceLineNo">2329</span> for (RegionStateNode regionNode : pendingAssignQueue) {<a name="line.2329"></a> |
| <span class="sourceLineNo">2330</span> regions.put(regionNode.getRegionInfo(), regionNode);<a name="line.2330"></a> |
| <span class="sourceLineNo">2331</span> }<a name="line.2331"></a> |
| <span class="sourceLineNo">2332</span> pendingAssignQueue.clear();<a name="line.2332"></a> |
| <span class="sourceLineNo">2333</span> } catch (InterruptedException e) {<a name="line.2333"></a> |
| <span class="sourceLineNo">2334</span> LOG.warn("got interrupted ", e);<a name="line.2334"></a> |
| <span class="sourceLineNo">2335</span> Thread.currentThread().interrupt();<a name="line.2335"></a> |
| <span class="sourceLineNo">2336</span> } finally {<a name="line.2336"></a> |
| <span class="sourceLineNo">2337</span> assignQueueLock.unlock();<a name="line.2337"></a> |
| <span class="sourceLineNo">2338</span> }<a name="line.2338"></a> |
| <span class="sourceLineNo">2339</span> return regions;<a name="line.2339"></a> |
| <span class="sourceLineNo">2340</span> }<a name="line.2340"></a> |
| <span class="sourceLineNo">2341</span><a name="line.2341"></a> |
| <span class="sourceLineNo">2342</span> private void processAssignQueue() {<a name="line.2342"></a> |
| <span class="sourceLineNo">2343</span> final HashMap<RegionInfo, RegionStateNode> regions = waitOnAssignQueue();<a name="line.2343"></a> |
| <span class="sourceLineNo">2344</span> if (regions == null || regions.size() == 0 || !isRunning()) {<a name="line.2344"></a> |
| <span class="sourceLineNo">2345</span> return;<a name="line.2345"></a> |
| <span class="sourceLineNo">2346</span> }<a name="line.2346"></a> |
| <span class="sourceLineNo">2347</span><a name="line.2347"></a> |
| <span class="sourceLineNo">2348</span> if (LOG.isTraceEnabled()) {<a name="line.2348"></a> |
| <span class="sourceLineNo">2349</span> LOG.trace("PROCESS ASSIGN QUEUE regionCount=" + regions.size());<a name="line.2349"></a> |
| <span class="sourceLineNo">2350</span> }<a name="line.2350"></a> |
| <span class="sourceLineNo">2351</span><a name="line.2351"></a> |
| <span class="sourceLineNo">2352</span> // TODO: Optimize balancer. pass a RegionPlan?<a name="line.2352"></a> |
| <span class="sourceLineNo">2353</span> final HashMap<RegionInfo, ServerName> retainMap = new HashMap<>();<a name="line.2353"></a> |
| <span class="sourceLineNo">2354</span> final List<RegionInfo> userHRIs = new ArrayList<>(regions.size());<a name="line.2354"></a> |
| <span class="sourceLineNo">2355</span> // Regions for system tables requiring reassignment<a name="line.2355"></a> |
| <span class="sourceLineNo">2356</span> final List<RegionInfo> systemHRIs = new ArrayList<>();<a name="line.2356"></a> |
| <span class="sourceLineNo">2357</span> for (RegionStateNode regionStateNode : regions.values()) {<a name="line.2357"></a> |
| <span class="sourceLineNo">2358</span> boolean sysTable = regionStateNode.isSystemTable();<a name="line.2358"></a> |
| <span class="sourceLineNo">2359</span> final List<RegionInfo> hris = sysTable ? systemHRIs : userHRIs;<a name="line.2359"></a> |
| <span class="sourceLineNo">2360</span> if (regionStateNode.getRegionLocation() != null) {<a name="line.2360"></a> |
| <span class="sourceLineNo">2361</span> retainMap.put(regionStateNode.getRegionInfo(), regionStateNode.getRegionLocation());<a name="line.2361"></a> |
| <span class="sourceLineNo">2362</span> } else {<a name="line.2362"></a> |
| <span class="sourceLineNo">2363</span> hris.add(regionStateNode.getRegionInfo());<a name="line.2363"></a> |
| <span class="sourceLineNo">2364</span> }<a name="line.2364"></a> |
| <span class="sourceLineNo">2365</span> }<a name="line.2365"></a> |
| <span class="sourceLineNo">2366</span><a name="line.2366"></a> |
| <span class="sourceLineNo">2367</span> // TODO: connect with the listener to invalidate the cache<a name="line.2367"></a> |
| <span class="sourceLineNo">2368</span><a name="line.2368"></a> |
| <span class="sourceLineNo">2369</span> // TODO use events<a name="line.2369"></a> |
| <span class="sourceLineNo">2370</span> List<ServerName> servers = master.getServerManager().createDestinationServersList();<a name="line.2370"></a> |
| <span class="sourceLineNo">2371</span> for (int i = 0; servers.size() < 1; ++i) {<a name="line.2371"></a> |
| <span class="sourceLineNo">2372</span> // Report every fourth time around this loop; try not to flood log.<a name="line.2372"></a> |
| <span class="sourceLineNo">2373</span> if (i % 4 == 0) {<a name="line.2373"></a> |
| <span class="sourceLineNo">2374</span> LOG.warn("No servers available; cannot place " + regions.size() + " unassigned regions.");<a name="line.2374"></a> |
| <span class="sourceLineNo">2375</span> }<a name="line.2375"></a> |
| <span class="sourceLineNo">2376</span><a name="line.2376"></a> |
| <span class="sourceLineNo">2377</span> if (!isRunning()) {<a name="line.2377"></a> |
| <span class="sourceLineNo">2378</span> LOG.debug("Stopped! Dropping assign of " + regions.size() + " queued regions.");<a name="line.2378"></a> |
| <span class="sourceLineNo">2379</span> return;<a name="line.2379"></a> |
| <span class="sourceLineNo">2380</span> }<a name="line.2380"></a> |
| <span class="sourceLineNo">2381</span> Threads.sleep(250);<a name="line.2381"></a> |
| <span class="sourceLineNo">2382</span> servers = master.getServerManager().createDestinationServersList();<a name="line.2382"></a> |
| <span class="sourceLineNo">2383</span> }<a name="line.2383"></a> |
| <span class="sourceLineNo">2384</span><a name="line.2384"></a> |
| <span class="sourceLineNo">2385</span> if (!systemHRIs.isEmpty()) {<a name="line.2385"></a> |
| <span class="sourceLineNo">2386</span> // System table regions requiring reassignment are present, get region servers<a name="line.2386"></a> |
| <span class="sourceLineNo">2387</span> // not available for system table regions<a name="line.2387"></a> |
| <span class="sourceLineNo">2388</span> final List<ServerName> excludeServers = getExcludedServersForSystemTable();<a name="line.2388"></a> |
| <span class="sourceLineNo">2389</span> List<ServerName> serversForSysTables =<a name="line.2389"></a> |
| <span class="sourceLineNo">2390</span> servers.stream().filter(s -> !excludeServers.contains(s)).collect(Collectors.toList());<a name="line.2390"></a> |
| <span class="sourceLineNo">2391</span> if (serversForSysTables.isEmpty()) {<a name="line.2391"></a> |
| <span class="sourceLineNo">2392</span> LOG.warn("Filtering old server versions and the excluded produced an empty set; "<a name="line.2392"></a> |
| <span class="sourceLineNo">2393</span> + "instead considering all candidate servers!");<a name="line.2393"></a> |
| <span class="sourceLineNo">2394</span> }<a name="line.2394"></a> |
| <span class="sourceLineNo">2395</span> LOG.debug("Processing assignQueue; systemServersCount=" + serversForSysTables.size()<a name="line.2395"></a> |
| <span class="sourceLineNo">2396</span> + ", allServersCount=" + servers.size());<a name="line.2396"></a> |
| <span class="sourceLineNo">2397</span> processAssignmentPlans(regions, null, systemHRIs,<a name="line.2397"></a> |
| <span class="sourceLineNo">2398</span> serversForSysTables.isEmpty() && !containsBogusAssignments(regions, systemHRIs)<a name="line.2398"></a> |
| <span class="sourceLineNo">2399</span> ? servers<a name="line.2399"></a> |
| <span class="sourceLineNo">2400</span> : serversForSysTables);<a name="line.2400"></a> |
| <span class="sourceLineNo">2401</span> }<a name="line.2401"></a> |
| <span class="sourceLineNo">2402</span><a name="line.2402"></a> |
| <span class="sourceLineNo">2403</span> processAssignmentPlans(regions, retainMap, userHRIs, servers);<a name="line.2403"></a> |
| <span class="sourceLineNo">2404</span> }<a name="line.2404"></a> |
| <span class="sourceLineNo">2405</span><a name="line.2405"></a> |
| <span class="sourceLineNo">2406</span> private boolean containsBogusAssignments(Map<RegionInfo, RegionStateNode> regions,<a name="line.2406"></a> |
| <span class="sourceLineNo">2407</span> List<RegionInfo> hirs) {<a name="line.2407"></a> |
| <span class="sourceLineNo">2408</span> for (RegionInfo ri : hirs) {<a name="line.2408"></a> |
| <span class="sourceLineNo">2409</span> if (<a name="line.2409"></a> |
| <span class="sourceLineNo">2410</span> regions.get(ri).getRegionLocation() != null<a name="line.2410"></a> |
| <span class="sourceLineNo">2411</span> && regions.get(ri).getRegionLocation().equals(LoadBalancer.BOGUS_SERVER_NAME)<a name="line.2411"></a> |
| <span class="sourceLineNo">2412</span> ) {<a name="line.2412"></a> |
| <span class="sourceLineNo">2413</span> return true;<a name="line.2413"></a> |
| <span class="sourceLineNo">2414</span> }<a name="line.2414"></a> |
| <span class="sourceLineNo">2415</span> }<a name="line.2415"></a> |
| <span class="sourceLineNo">2416</span> return false;<a name="line.2416"></a> |
| <span class="sourceLineNo">2417</span> }<a name="line.2417"></a> |
| <span class="sourceLineNo">2418</span><a name="line.2418"></a> |
| <span class="sourceLineNo">2419</span> private void processAssignmentPlans(final HashMap<RegionInfo, RegionStateNode> regions,<a name="line.2419"></a> |
| <span class="sourceLineNo">2420</span> final HashMap<RegionInfo, ServerName> retainMap, final List<RegionInfo> hris,<a name="line.2420"></a> |
| <span class="sourceLineNo">2421</span> final List<ServerName> servers) {<a name="line.2421"></a> |
| <span class="sourceLineNo">2422</span> boolean isTraceEnabled = LOG.isTraceEnabled();<a name="line.2422"></a> |
| <span class="sourceLineNo">2423</span> if (isTraceEnabled) {<a name="line.2423"></a> |
| <span class="sourceLineNo">2424</span> LOG.trace("Available servers count=" + servers.size() + ": " + servers);<a name="line.2424"></a> |
| <span class="sourceLineNo">2425</span> }<a name="line.2425"></a> |
| <span class="sourceLineNo">2426</span><a name="line.2426"></a> |
| <span class="sourceLineNo">2427</span> final LoadBalancer balancer = getBalancer();<a name="line.2427"></a> |
| <span class="sourceLineNo">2428</span> // ask the balancer where to place regions<a name="line.2428"></a> |
| <span class="sourceLineNo">2429</span> if (retainMap != null && !retainMap.isEmpty()) {<a name="line.2429"></a> |
| <span class="sourceLineNo">2430</span> if (isTraceEnabled) {<a name="line.2430"></a> |
| <span class="sourceLineNo">2431</span> LOG.trace("retain assign regions=" + retainMap);<a name="line.2431"></a> |
| <span class="sourceLineNo">2432</span> }<a name="line.2432"></a> |
| <span class="sourceLineNo">2433</span> try {<a name="line.2433"></a> |
| <span class="sourceLineNo">2434</span> acceptPlan(regions, balancer.retainAssignment(retainMap, servers));<a name="line.2434"></a> |
| <span class="sourceLineNo">2435</span> } catch (IOException e) {<a name="line.2435"></a> |
| <span class="sourceLineNo">2436</span> LOG.warn("unable to retain assignment", e);<a name="line.2436"></a> |
| <span class="sourceLineNo">2437</span> addToPendingAssignment(regions, retainMap.keySet());<a name="line.2437"></a> |
| <span class="sourceLineNo">2438</span> }<a name="line.2438"></a> |
| <span class="sourceLineNo">2439</span> }<a name="line.2439"></a> |
| <span class="sourceLineNo">2440</span><a name="line.2440"></a> |
| <span class="sourceLineNo">2441</span> // TODO: Do we need to split retain and round-robin?<a name="line.2441"></a> |
| <span class="sourceLineNo">2442</span> // the retain seems to fallback to round-robin/random if the region is not in the map.<a name="line.2442"></a> |
| <span class="sourceLineNo">2443</span> if (!hris.isEmpty()) {<a name="line.2443"></a> |
| <span class="sourceLineNo">2444</span> Collections.sort(hris, RegionInfo.COMPARATOR);<a name="line.2444"></a> |
| <span class="sourceLineNo">2445</span> if (isTraceEnabled) {<a name="line.2445"></a> |
| <span class="sourceLineNo">2446</span> LOG.trace("round robin regions=" + hris);<a name="line.2446"></a> |
| <span class="sourceLineNo">2447</span> }<a name="line.2447"></a> |
| <span class="sourceLineNo">2448</span> try {<a name="line.2448"></a> |
| <span class="sourceLineNo">2449</span> acceptPlan(regions, balancer.roundRobinAssignment(hris, servers));<a name="line.2449"></a> |
| <span class="sourceLineNo">2450</span> } catch (IOException e) {<a name="line.2450"></a> |
| <span class="sourceLineNo">2451</span> LOG.warn("unable to round-robin assignment", e);<a name="line.2451"></a> |
| <span class="sourceLineNo">2452</span> addToPendingAssignment(regions, hris);<a name="line.2452"></a> |
| <span class="sourceLineNo">2453</span> }<a name="line.2453"></a> |
| <span class="sourceLineNo">2454</span> }<a name="line.2454"></a> |
| <span class="sourceLineNo">2455</span> }<a name="line.2455"></a> |
| <span class="sourceLineNo">2456</span><a name="line.2456"></a> |
| <span class="sourceLineNo">2457</span> private void acceptPlan(final HashMap<RegionInfo, RegionStateNode> regions,<a name="line.2457"></a> |
| <span class="sourceLineNo">2458</span> final Map<ServerName, List<RegionInfo>> plan) throws HBaseIOException {<a name="line.2458"></a> |
| <span class="sourceLineNo">2459</span> final ProcedureEvent<?>[] events = new ProcedureEvent[regions.size()];<a name="line.2459"></a> |
| <span class="sourceLineNo">2460</span> final long st = EnvironmentEdgeManager.currentTime();<a name="line.2460"></a> |
| <span class="sourceLineNo">2461</span><a name="line.2461"></a> |
| <span class="sourceLineNo">2462</span> if (plan.isEmpty()) {<a name="line.2462"></a> |
| <span class="sourceLineNo">2463</span> throw new HBaseIOException("unable to compute plans for regions=" + regions.size());<a name="line.2463"></a> |
| <span class="sourceLineNo">2464</span> }<a name="line.2464"></a> |
| <span class="sourceLineNo">2465</span><a name="line.2465"></a> |
| <span class="sourceLineNo">2466</span> int evcount = 0;<a name="line.2466"></a> |
| <span class="sourceLineNo">2467</span> for (Map.Entry<ServerName, List<RegionInfo>> entry : plan.entrySet()) {<a name="line.2467"></a> |
| <span class="sourceLineNo">2468</span> final ServerName server = entry.getKey();<a name="line.2468"></a> |
| <span class="sourceLineNo">2469</span> for (RegionInfo hri : entry.getValue()) {<a name="line.2469"></a> |
| <span class="sourceLineNo">2470</span> final RegionStateNode regionNode = regions.get(hri);<a name="line.2470"></a> |
| <span class="sourceLineNo">2471</span> regionNode.setRegionLocation(server);<a name="line.2471"></a> |
| <span class="sourceLineNo">2472</span> if (server.equals(LoadBalancer.BOGUS_SERVER_NAME) && regionNode.isSystemTable()) {<a name="line.2472"></a> |
| <span class="sourceLineNo">2473</span> assignQueueLock.lock();<a name="line.2473"></a> |
| <span class="sourceLineNo">2474</span> try {<a name="line.2474"></a> |
| <span class="sourceLineNo">2475</span> pendingAssignQueue.add(regionNode);<a name="line.2475"></a> |
| <span class="sourceLineNo">2476</span> } finally {<a name="line.2476"></a> |
| <span class="sourceLineNo">2477</span> assignQueueLock.unlock();<a name="line.2477"></a> |
| <span class="sourceLineNo">2478</span> }<a name="line.2478"></a> |
| <span class="sourceLineNo">2479</span> } else {<a name="line.2479"></a> |
| <span class="sourceLineNo">2480</span> events[evcount++] = regionNode.getProcedureEvent();<a name="line.2480"></a> |
| <span class="sourceLineNo">2481</span> }<a name="line.2481"></a> |
| <span class="sourceLineNo">2482</span> }<a name="line.2482"></a> |
| <span class="sourceLineNo">2483</span> }<a name="line.2483"></a> |
| <span class="sourceLineNo">2484</span> ProcedureEvent.wakeEvents(getProcedureScheduler(), events);<a name="line.2484"></a> |
| <span class="sourceLineNo">2485</span><a name="line.2485"></a> |
| <span class="sourceLineNo">2486</span> final long et = EnvironmentEdgeManager.currentTime();<a name="line.2486"></a> |
| <span class="sourceLineNo">2487</span> if (LOG.isTraceEnabled()) {<a name="line.2487"></a> |
| <span class="sourceLineNo">2488</span> LOG.trace("ASSIGN ACCEPT " + events.length + " -> " + StringUtils.humanTimeDiff(et - st));<a name="line.2488"></a> |
| <span class="sourceLineNo">2489</span> }<a name="line.2489"></a> |
| <span class="sourceLineNo">2490</span> }<a name="line.2490"></a> |
| <span class="sourceLineNo">2491</span><a name="line.2491"></a> |
| <span class="sourceLineNo">2492</span> private void addToPendingAssignment(final HashMap<RegionInfo, RegionStateNode> regions,<a name="line.2492"></a> |
| <span class="sourceLineNo">2493</span> final Collection<RegionInfo> pendingRegions) {<a name="line.2493"></a> |
| <span class="sourceLineNo">2494</span> assignQueueLock.lock();<a name="line.2494"></a> |
| <span class="sourceLineNo">2495</span> try {<a name="line.2495"></a> |
| <span class="sourceLineNo">2496</span> for (RegionInfo hri : pendingRegions) {<a name="line.2496"></a> |
| <span class="sourceLineNo">2497</span> pendingAssignQueue.add(regions.get(hri));<a name="line.2497"></a> |
| <span class="sourceLineNo">2498</span> }<a name="line.2498"></a> |
| <span class="sourceLineNo">2499</span> } finally {<a name="line.2499"></a> |
| <span class="sourceLineNo">2500</span> assignQueueLock.unlock();<a name="line.2500"></a> |
| <span class="sourceLineNo">2501</span> }<a name="line.2501"></a> |
| <span class="sourceLineNo">2502</span> }<a name="line.2502"></a> |
| <span class="sourceLineNo">2503</span><a name="line.2503"></a> |
| <span class="sourceLineNo">2504</span> /**<a name="line.2504"></a> |
| <span class="sourceLineNo">2505</span> * For a given cluster with mixed versions of servers, get a list of servers with lower versions,<a name="line.2505"></a> |
| <span class="sourceLineNo">2506</span> * where system table regions should not be assigned to. For system table, we must assign regions<a name="line.2506"></a> |
| <span class="sourceLineNo">2507</span> * to a server with highest version. However, we can disable this exclusion using config:<a name="line.2507"></a> |
| <span class="sourceLineNo">2508</span> * "hbase.min.version.move.system.tables" if checkForMinVersion is true. Detailed explanation<a name="line.2508"></a> |
| <span class="sourceLineNo">2509</span> * available with definition of minVersionToMoveSysTables.<a name="line.2509"></a> |
| <span class="sourceLineNo">2510</span> * @return List of Excluded servers for System table regions.<a name="line.2510"></a> |
| <span class="sourceLineNo">2511</span> */<a name="line.2511"></a> |
| <span class="sourceLineNo">2512</span> public List<ServerName> getExcludedServersForSystemTable() {<a name="line.2512"></a> |
| <span class="sourceLineNo">2513</span> // TODO: This should be a cached list kept by the ServerManager rather than calculated on each<a name="line.2513"></a> |
| <span class="sourceLineNo">2514</span> // move or system region assign. The RegionServerTracker keeps list of online Servers with<a name="line.2514"></a> |
| <span class="sourceLineNo">2515</span> // RegionServerInfo that includes Version.<a name="line.2515"></a> |
| <span class="sourceLineNo">2516</span> List<Pair<ServerName, String>> serverList =<a name="line.2516"></a> |
| <span class="sourceLineNo">2517</span> master.getServerManager().getOnlineServersList().stream()<a name="line.2517"></a> |
| <span class="sourceLineNo">2518</span> .map(s -> new Pair<>(s, master.getRegionServerVersion(s))).collect(Collectors.toList());<a name="line.2518"></a> |
| <span class="sourceLineNo">2519</span> if (serverList.isEmpty()) {<a name="line.2519"></a> |
| <span class="sourceLineNo">2520</span> return new ArrayList<>();<a name="line.2520"></a> |
| <span class="sourceLineNo">2521</span> }<a name="line.2521"></a> |
| <span class="sourceLineNo">2522</span> String highestVersion = Collections<a name="line.2522"></a> |
| <span class="sourceLineNo">2523</span> .max(serverList, (o1, o2) -> VersionInfo.compareVersion(o1.getSecond(), o2.getSecond()))<a name="line.2523"></a> |
| <span class="sourceLineNo">2524</span> .getSecond();<a name="line.2524"></a> |
| <span class="sourceLineNo">2525</span> if (!DEFAULT_MIN_VERSION_MOVE_SYS_TABLES_CONFIG.equals(minVersionToMoveSysTables)) {<a name="line.2525"></a> |
| <span class="sourceLineNo">2526</span> int comparedValue = VersionInfo.compareVersion(minVersionToMoveSysTables, highestVersion);<a name="line.2526"></a> |
| <span class="sourceLineNo">2527</span> if (comparedValue > 0) {<a name="line.2527"></a> |
| <span class="sourceLineNo">2528</span> return new ArrayList<>();<a name="line.2528"></a> |
| <span class="sourceLineNo">2529</span> }<a name="line.2529"></a> |
| <span class="sourceLineNo">2530</span> }<a name="line.2530"></a> |
| <span class="sourceLineNo">2531</span> return serverList.stream().filter(pair -> !pair.getSecond().equals(highestVersion))<a name="line.2531"></a> |
| <span class="sourceLineNo">2532</span> .map(Pair::getFirst).collect(Collectors.toList());<a name="line.2532"></a> |
| <span class="sourceLineNo">2533</span> }<a name="line.2533"></a> |
| <span class="sourceLineNo">2534</span><a name="line.2534"></a> |
| <span class="sourceLineNo">2535</span> MasterServices getMaster() {<a name="line.2535"></a> |
| <span class="sourceLineNo">2536</span> return master;<a name="line.2536"></a> |
| <span class="sourceLineNo">2537</span> }<a name="line.2537"></a> |
| <span class="sourceLineNo">2538</span><a name="line.2538"></a> |
| <span class="sourceLineNo">2539</span> /** Returns a snapshot of rsReports */<a name="line.2539"></a> |
| <span class="sourceLineNo">2540</span> public Map<ServerName, Set<byte[]>> getRSReports() {<a name="line.2540"></a> |
| <span class="sourceLineNo">2541</span> Map<ServerName, Set<byte[]>> rsReportsSnapshot = new HashMap<>();<a name="line.2541"></a> |
| <span class="sourceLineNo">2542</span> synchronized (rsReports) {<a name="line.2542"></a> |
| <span class="sourceLineNo">2543</span> rsReports.entrySet().forEach(e -> rsReportsSnapshot.put(e.getKey(), e.getValue()));<a name="line.2543"></a> |
| <span class="sourceLineNo">2544</span> }<a name="line.2544"></a> |
| <span class="sourceLineNo">2545</span> return rsReportsSnapshot;<a name="line.2545"></a> |
| <span class="sourceLineNo">2546</span> }<a name="line.2546"></a> |
| <span class="sourceLineNo">2547</span><a name="line.2547"></a> |
| <span class="sourceLineNo">2548</span> /**<a name="line.2548"></a> |
| <span class="sourceLineNo">2549</span> * Provide regions state count for given table. e.g howmany regions of give table are<a name="line.2549"></a> |
| <span class="sourceLineNo">2550</span> * opened/closed/rit etc<a name="line.2550"></a> |
| <span class="sourceLineNo">2551</span> * @param tableName TableName<a name="line.2551"></a> |
| <span class="sourceLineNo">2552</span> * @return region states count<a name="line.2552"></a> |
| <span class="sourceLineNo">2553</span> */<a name="line.2553"></a> |
| <span class="sourceLineNo">2554</span> public RegionStatesCount getRegionStatesCount(TableName tableName) {<a name="line.2554"></a> |
| <span class="sourceLineNo">2555</span> int openRegionsCount = 0;<a name="line.2555"></a> |
| <span class="sourceLineNo">2556</span> int closedRegionCount = 0;<a name="line.2556"></a> |
| <span class="sourceLineNo">2557</span> int ritCount = 0;<a name="line.2557"></a> |
| <span class="sourceLineNo">2558</span> int splitRegionCount = 0;<a name="line.2558"></a> |
| <span class="sourceLineNo">2559</span> int totalRegionCount = 0;<a name="line.2559"></a> |
| <span class="sourceLineNo">2560</span> if (!isTableDisabled(tableName)) {<a name="line.2560"></a> |
| <span class="sourceLineNo">2561</span> final List<RegionState> states = regionStates.getTableRegionStates(tableName);<a name="line.2561"></a> |
| <span class="sourceLineNo">2562</span> for (RegionState regionState : states) {<a name="line.2562"></a> |
| <span class="sourceLineNo">2563</span> if (regionState.isOpened()) {<a name="line.2563"></a> |
| <span class="sourceLineNo">2564</span> openRegionsCount++;<a name="line.2564"></a> |
| <span class="sourceLineNo">2565</span> } else if (regionState.isClosed()) {<a name="line.2565"></a> |
| <span class="sourceLineNo">2566</span> closedRegionCount++;<a name="line.2566"></a> |
| <span class="sourceLineNo">2567</span> } else if (regionState.isSplit()) {<a name="line.2567"></a> |
| <span class="sourceLineNo">2568</span> splitRegionCount++;<a name="line.2568"></a> |
| <span class="sourceLineNo">2569</span> }<a name="line.2569"></a> |
| <span class="sourceLineNo">2570</span> }<a name="line.2570"></a> |
| <span class="sourceLineNo">2571</span> totalRegionCount = states.size();<a name="line.2571"></a> |
| <span class="sourceLineNo">2572</span> ritCount = totalRegionCount - openRegionsCount - splitRegionCount;<a name="line.2572"></a> |
| <span class="sourceLineNo">2573</span> }<a name="line.2573"></a> |
| <span class="sourceLineNo">2574</span> return new RegionStatesCount.RegionStatesCountBuilder().setOpenRegions(openRegionsCount)<a name="line.2574"></a> |
| <span class="sourceLineNo">2575</span> .setClosedRegions(closedRegionCount).setSplitRegions(splitRegionCount)<a name="line.2575"></a> |
| <span class="sourceLineNo">2576</span> .setRegionsInTransition(ritCount).setTotalRegions(totalRegionCount).build();<a name="line.2576"></a> |
| <span class="sourceLineNo">2577</span> }<a name="line.2577"></a> |
| <span class="sourceLineNo">2578</span><a name="line.2578"></a> |
| <span class="sourceLineNo">2579</span>}<a name="line.2579"></a> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </pre> |
| </div> |
| </body> |
| </html> |