| <!DOCTYPE HTML> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (17) --> |
| <title>Source code</title> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <meta name="description" content="source: package: org.apache.hadoop.hbase.master.assignment, class: AssignmentManager, class: RegionInTransitionStat"> |
| <meta name="generator" content="javadoc/SourceToHTMLConverter"> |
| <link rel="stylesheet" type="text/css" href="../../../../../../../stylesheet.css" title="Style"> |
| </head> |
| <body class="source-page"> |
| <main role="main"> |
| <div class="source-container"> |
| <pre><span class="source-line-no">001</span><span id="line-1">/*</span> |
| <span class="source-line-no">002</span><span id="line-2"> * Licensed to the Apache Software Foundation (ASF) under one</span> |
| <span class="source-line-no">003</span><span id="line-3"> * or more contributor license agreements. See the NOTICE file</span> |
| <span class="source-line-no">004</span><span id="line-4"> * distributed with this work for additional information</span> |
| <span class="source-line-no">005</span><span id="line-5"> * regarding copyright ownership. The ASF licenses this file</span> |
| <span class="source-line-no">006</span><span id="line-6"> * to you under the Apache License, Version 2.0 (the</span> |
| <span class="source-line-no">007</span><span id="line-7"> * "License"); you may not use this file except in compliance</span> |
| <span class="source-line-no">008</span><span id="line-8"> * with the License. You may obtain a copy of the License at</span> |
| <span class="source-line-no">009</span><span id="line-9"> *</span> |
| <span class="source-line-no">010</span><span id="line-10"> * http://www.apache.org/licenses/LICENSE-2.0</span> |
| <span class="source-line-no">011</span><span id="line-11"> *</span> |
| <span class="source-line-no">012</span><span id="line-12"> * Unless required by applicable law or agreed to in writing, software</span> |
| <span class="source-line-no">013</span><span id="line-13"> * distributed under the License is distributed on an "AS IS" BASIS,</span> |
| <span class="source-line-no">014</span><span id="line-14"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> |
| <span class="source-line-no">015</span><span id="line-15"> * See the License for the specific language governing permissions and</span> |
| <span class="source-line-no">016</span><span id="line-16"> * limitations under the License.</span> |
| <span class="source-line-no">017</span><span id="line-17"> */</span> |
| <span class="source-line-no">018</span><span id="line-18">package org.apache.hadoop.hbase.master.assignment;</span> |
| <span class="source-line-no">019</span><span id="line-19"></span> |
| <span class="source-line-no">020</span><span id="line-20">import edu.umd.cs.findbugs.annotations.NonNull;</span> |
| <span class="source-line-no">021</span><span id="line-21">import java.io.IOException;</span> |
| <span class="source-line-no">022</span><span id="line-22">import java.util.ArrayList;</span> |
| <span class="source-line-no">023</span><span id="line-23">import java.util.Collection;</span> |
| <span class="source-line-no">024</span><span id="line-24">import java.util.Collections;</span> |
| <span class="source-line-no">025</span><span id="line-25">import java.util.HashMap;</span> |
| <span class="source-line-no">026</span><span id="line-26">import java.util.HashSet;</span> |
| <span class="source-line-no">027</span><span id="line-27">import java.util.List;</span> |
| <span class="source-line-no">028</span><span id="line-28">import java.util.Map;</span> |
| <span class="source-line-no">029</span><span id="line-29">import java.util.Set;</span> |
| <span class="source-line-no">030</span><span id="line-30">import java.util.concurrent.CompletableFuture;</span> |
| <span class="source-line-no">031</span><span id="line-31">import java.util.concurrent.Future;</span> |
| <span class="source-line-no">032</span><span id="line-32">import java.util.concurrent.TimeUnit;</span> |
| <span class="source-line-no">033</span><span id="line-33">import java.util.concurrent.atomic.AtomicBoolean;</span> |
| <span class="source-line-no">034</span><span id="line-34">import java.util.concurrent.locks.Condition;</span> |
| <span class="source-line-no">035</span><span id="line-35">import java.util.concurrent.locks.ReentrantLock;</span> |
| <span class="source-line-no">036</span><span id="line-36">import java.util.function.Consumer;</span> |
| <span class="source-line-no">037</span><span id="line-37">import java.util.function.Function;</span> |
| <span class="source-line-no">038</span><span id="line-38">import java.util.stream.Collectors;</span> |
| <span class="source-line-no">039</span><span id="line-39">import java.util.stream.Stream;</span> |
| <span class="source-line-no">040</span><span id="line-40">import org.apache.hadoop.conf.Configuration;</span> |
| <span class="source-line-no">041</span><span id="line-41">import org.apache.hadoop.hbase.CatalogFamilyFormat;</span> |
| <span class="source-line-no">042</span><span id="line-42">import org.apache.hadoop.hbase.DoNotRetryIOException;</span> |
| <span class="source-line-no">043</span><span id="line-43">import org.apache.hadoop.hbase.HBaseIOException;</span> |
| <span class="source-line-no">044</span><span id="line-44">import org.apache.hadoop.hbase.HConstants;</span> |
| <span class="source-line-no">045</span><span id="line-45">import org.apache.hadoop.hbase.PleaseHoldException;</span> |
| <span class="source-line-no">046</span><span id="line-46">import org.apache.hadoop.hbase.ServerName;</span> |
| <span class="source-line-no">047</span><span id="line-47">import org.apache.hadoop.hbase.TableName;</span> |
| <span class="source-line-no">048</span><span id="line-48">import org.apache.hadoop.hbase.UnknownRegionException;</span> |
| <span class="source-line-no">049</span><span id="line-49">import org.apache.hadoop.hbase.client.DoNotRetryRegionException;</span> |
| <span class="source-line-no">050</span><span id="line-50">import org.apache.hadoop.hbase.client.MasterSwitchType;</span> |
| <span class="source-line-no">051</span><span id="line-51">import org.apache.hadoop.hbase.client.RegionInfo;</span> |
| <span class="source-line-no">052</span><span id="line-52">import org.apache.hadoop.hbase.client.RegionInfoBuilder;</span> |
| <span class="source-line-no">053</span><span id="line-53">import org.apache.hadoop.hbase.client.RegionReplicaUtil;</span> |
| <span class="source-line-no">054</span><span id="line-54">import org.apache.hadoop.hbase.client.RegionStatesCount;</span> |
| <span class="source-line-no">055</span><span id="line-55">import org.apache.hadoop.hbase.client.Result;</span> |
| <span class="source-line-no">056</span><span id="line-56">import org.apache.hadoop.hbase.client.ResultScanner;</span> |
| <span class="source-line-no">057</span><span id="line-57">import org.apache.hadoop.hbase.client.Scan;</span> |
| <span class="source-line-no">058</span><span id="line-58">import org.apache.hadoop.hbase.client.TableDescriptor;</span> |
| <span class="source-line-no">059</span><span id="line-59">import org.apache.hadoop.hbase.client.TableState;</span> |
| <span class="source-line-no">060</span><span id="line-60">import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;</span> |
| <span class="source-line-no">061</span><span id="line-61">import org.apache.hadoop.hbase.favored.FavoredNodesManager;</span> |
| <span class="source-line-no">062</span><span id="line-62">import org.apache.hadoop.hbase.favored.FavoredNodesPromoter;</span> |
| <span class="source-line-no">063</span><span id="line-63">import org.apache.hadoop.hbase.master.LoadBalancer;</span> |
| <span class="source-line-no">064</span><span id="line-64">import org.apache.hadoop.hbase.master.MasterServices;</span> |
| <span class="source-line-no">065</span><span id="line-65">import org.apache.hadoop.hbase.master.MetricsAssignmentManager;</span> |
| <span class="source-line-no">066</span><span id="line-66">import org.apache.hadoop.hbase.master.RegionPlan;</span> |
| <span class="source-line-no">067</span><span id="line-67">import org.apache.hadoop.hbase.master.RegionState;</span> |
| <span class="source-line-no">068</span><span id="line-68">import org.apache.hadoop.hbase.master.RegionState.State;</span> |
| <span class="source-line-no">069</span><span id="line-69">import org.apache.hadoop.hbase.master.ServerManager;</span> |
| <span class="source-line-no">070</span><span id="line-70">import org.apache.hadoop.hbase.master.TableStateManager;</span> |
| <span class="source-line-no">071</span><span id="line-71">import org.apache.hadoop.hbase.master.balancer.FavoredStochasticBalancer;</span> |
| <span class="source-line-no">072</span><span id="line-72">import org.apache.hadoop.hbase.master.procedure.HBCKServerCrashProcedure;</span> |
| <span class="source-line-no">073</span><span id="line-73">import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;</span> |
| <span class="source-line-no">074</span><span id="line-74">import org.apache.hadoop.hbase.master.procedure.MasterProcedureScheduler;</span> |
| <span class="source-line-no">075</span><span id="line-75">import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait;</span> |
| <span class="source-line-no">076</span><span id="line-76">import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;</span> |
| <span class="source-line-no">077</span><span id="line-77">import org.apache.hadoop.hbase.master.procedure.TruncateRegionProcedure;</span> |
| <span class="source-line-no">078</span><span id="line-78">import org.apache.hadoop.hbase.master.region.MasterRegion;</span> |
| <span class="source-line-no">079</span><span id="line-79">import org.apache.hadoop.hbase.procedure2.Procedure;</span> |
| <span class="source-line-no">080</span><span id="line-80">import org.apache.hadoop.hbase.procedure2.ProcedureEvent;</span> |
| <span class="source-line-no">081</span><span id="line-81">import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;</span> |
| <span class="source-line-no">082</span><span id="line-82">import org.apache.hadoop.hbase.procedure2.ProcedureInMemoryChore;</span> |
| <span class="source-line-no">083</span><span id="line-83">import org.apache.hadoop.hbase.procedure2.util.StringUtils;</span> |
| <span class="source-line-no">084</span><span id="line-84">import org.apache.hadoop.hbase.regionserver.SequenceId;</span> |
| <span class="source-line-no">085</span><span id="line-85">import org.apache.hadoop.hbase.rsgroup.RSGroupBasedLoadBalancer;</span> |
| <span class="source-line-no">086</span><span id="line-86">import org.apache.hadoop.hbase.util.Bytes;</span> |
| <span class="source-line-no">087</span><span id="line-87">import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;</span> |
| <span class="source-line-no">088</span><span id="line-88">import org.apache.hadoop.hbase.util.FutureUtils;</span> |
| <span class="source-line-no">089</span><span id="line-89">import org.apache.hadoop.hbase.util.Pair;</span> |
| <span class="source-line-no">090</span><span id="line-90">import org.apache.hadoop.hbase.util.Threads;</span> |
| <span class="source-line-no">091</span><span id="line-91">import org.apache.hadoop.hbase.util.VersionInfo;</span> |
| <span class="source-line-no">092</span><span id="line-92">import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;</span> |
| <span class="source-line-no">093</span><span id="line-93">import org.apache.hadoop.hbase.zookeeper.ZKWatcher;</span> |
| <span class="source-line-no">094</span><span id="line-94">import org.apache.yetus.audience.InterfaceAudience;</span> |
| <span class="source-line-no">095</span><span id="line-95">import org.apache.zookeeper.KeeperException;</span> |
| <span class="source-line-no">096</span><span id="line-96">import org.slf4j.Logger;</span> |
| <span class="source-line-no">097</span><span id="line-97">import org.slf4j.LoggerFactory;</span> |
| <span class="source-line-no">098</span><span id="line-98"></span> |
| <span class="source-line-no">099</span><span id="line-99">import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;</span> |
| <span class="source-line-no">100</span><span id="line-100">import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;</span> |
| <span class="source-line-no">101</span><span id="line-101">import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;</span> |
| <span class="source-line-no">102</span><span id="line-102">import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;</span> |
| <span class="source-line-no">103</span><span id="line-103">import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;</span> |
| <span class="source-line-no">104</span><span id="line-104"></span> |
| <span class="source-line-no">105</span><span id="line-105">/**</span> |
| <span class="source-line-no">106</span><span id="line-106"> * The AssignmentManager is the coordinator for region assign/unassign operations.</span> |
| <span class="source-line-no">107</span><span id="line-107"> * <ul></span> |
| <span class="source-line-no">108</span><span id="line-108"> * <li>In-memory states of regions and servers are stored in {@link RegionStates}.</li></span> |
| <span class="source-line-no">109</span><span id="line-109"> * <li>hbase:meta state updates are handled by {@link RegionStateStore}.</li></span> |
| <span class="source-line-no">110</span><span id="line-110"> * </ul></span> |
| <span class="source-line-no">111</span><span id="line-111"> * Regions are created by CreateTable, Split, Merge. Regions are deleted by DeleteTable, Split,</span> |
| <span class="source-line-no">112</span><span id="line-112"> * Merge. Assigns are triggered by CreateTable, EnableTable, Split, Merge, ServerCrash. Unassigns</span> |
| <span class="source-line-no">113</span><span id="line-113"> * are triggered by DisableTable, Split, Merge</span> |
| <span class="source-line-no">114</span><span id="line-114"> */</span> |
| <span class="source-line-no">115</span><span id="line-115">@InterfaceAudience.Private</span> |
| <span class="source-line-no">116</span><span id="line-116">public class AssignmentManager {</span> |
| <span class="source-line-no">117</span><span id="line-117"> private static final Logger LOG = LoggerFactory.getLogger(AssignmentManager.class);</span> |
| <span class="source-line-no">118</span><span id="line-118"></span> |
| <span class="source-line-no">119</span><span id="line-119"> // TODO: AMv2</span> |
| <span class="source-line-no">120</span><span id="line-120"> // - handle region migration from hbase1 to hbase2.</span> |
| <span class="source-line-no">121</span><span id="line-121"> // - handle sys table assignment first (e.g. acl, namespace)</span> |
| <span class="source-line-no">122</span><span id="line-122"> // - handle table priorities</span> |
| <span class="source-line-no">123</span><span id="line-123"> // - If ServerBusyException trying to update hbase:meta, we abort the Master</span> |
| <span class="source-line-no">124</span><span id="line-124"> // See updateRegionLocation in RegionStateStore.</span> |
| <span class="source-line-no">125</span><span id="line-125"> //</span> |
| <span class="source-line-no">126</span><span id="line-126"> // See also</span> |
| <span class="source-line-no">127</span><span id="line-127"> // https://docs.google.com/document/d/1eVKa7FHdeoJ1-9o8yZcOTAQbv0u0bblBlCCzVSIn69g/edit#heading=h.ystjyrkbtoq5</span> |
| <span class="source-line-no">128</span><span id="line-128"> // for other TODOs.</span> |
| <span class="source-line-no">129</span><span id="line-129"></span> |
| <span class="source-line-no">130</span><span id="line-130"> public static final String BOOTSTRAP_THREAD_POOL_SIZE_CONF_KEY =</span> |
| <span class="source-line-no">131</span><span id="line-131"> "hbase.assignment.bootstrap.thread.pool.size";</span> |
| <span class="source-line-no">132</span><span id="line-132"></span> |
| <span class="source-line-no">133</span><span id="line-133"> public static final String ASSIGN_DISPATCH_WAIT_MSEC_CONF_KEY =</span> |
| <span class="source-line-no">134</span><span id="line-134"> "hbase.assignment.dispatch.wait.msec";</span> |
| <span class="source-line-no">135</span><span id="line-135"> private static final int DEFAULT_ASSIGN_DISPATCH_WAIT_MSEC = 150;</span> |
| <span class="source-line-no">136</span><span id="line-136"></span> |
| <span class="source-line-no">137</span><span id="line-137"> public static final String ASSIGN_DISPATCH_WAITQ_MAX_CONF_KEY =</span> |
| <span class="source-line-no">138</span><span id="line-138"> "hbase.assignment.dispatch.wait.queue.max.size";</span> |
| <span class="source-line-no">139</span><span id="line-139"> private static final int DEFAULT_ASSIGN_DISPATCH_WAITQ_MAX = 100;</span> |
| <span class="source-line-no">140</span><span id="line-140"></span> |
| <span class="source-line-no">141</span><span id="line-141"> public static final String RIT_CHORE_INTERVAL_MSEC_CONF_KEY =</span> |
| <span class="source-line-no">142</span><span id="line-142"> "hbase.assignment.rit.chore.interval.msec";</span> |
| <span class="source-line-no">143</span><span id="line-143"> private static final int DEFAULT_RIT_CHORE_INTERVAL_MSEC = 60 * 1000;</span> |
| <span class="source-line-no">144</span><span id="line-144"></span> |
| <span class="source-line-no">145</span><span id="line-145"> public static final String DEAD_REGION_METRIC_CHORE_INTERVAL_MSEC_CONF_KEY =</span> |
| <span class="source-line-no">146</span><span id="line-146"> "hbase.assignment.dead.region.metric.chore.interval.msec";</span> |
| <span class="source-line-no">147</span><span id="line-147"> private static final int DEFAULT_DEAD_REGION_METRIC_CHORE_INTERVAL_MSEC = 120 * 1000;</span> |
| <span class="source-line-no">148</span><span id="line-148"></span> |
| <span class="source-line-no">149</span><span id="line-149"> public static final String ASSIGN_MAX_ATTEMPTS = "hbase.assignment.maximum.attempts";</span> |
| <span class="source-line-no">150</span><span id="line-150"> private static final int DEFAULT_ASSIGN_MAX_ATTEMPTS = Integer.MAX_VALUE;</span> |
| <span class="source-line-no">151</span><span id="line-151"></span> |
| <span class="source-line-no">152</span><span id="line-152"> public static final String ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS =</span> |
| <span class="source-line-no">153</span><span id="line-153"> "hbase.assignment.retry.immediately.maximum.attempts";</span> |
| <span class="source-line-no">154</span><span id="line-154"> private static final int DEFAULT_ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS = 3;</span> |
| <span class="source-line-no">155</span><span id="line-155"></span> |
| <span class="source-line-no">156</span><span id="line-156"> /** Region in Transition metrics threshold time */</span> |
| <span class="source-line-no">157</span><span id="line-157"> public static final String METRICS_RIT_STUCK_WARNING_THRESHOLD =</span> |
| <span class="source-line-no">158</span><span id="line-158"> "hbase.metrics.rit.stuck.warning.threshold";</span> |
| <span class="source-line-no">159</span><span id="line-159"> private static final int DEFAULT_RIT_STUCK_WARNING_THRESHOLD = 60 * 1000;</span> |
| <span class="source-line-no">160</span><span id="line-160"> public static final String UNEXPECTED_STATE_REGION = "Unexpected state for ";</span> |
| <span class="source-line-no">161</span><span id="line-161"></span> |
| <span class="source-line-no">162</span><span id="line-162"> public static final String FORCE_REGION_RETAINMENT = "hbase.master.scp.retain.assignment.force";</span> |
| <span class="source-line-no">163</span><span id="line-163"></span> |
| <span class="source-line-no">164</span><span id="line-164"> public static final boolean DEFAULT_FORCE_REGION_RETAINMENT = false;</span> |
| <span class="source-line-no">165</span><span id="line-165"></span> |
| <span class="source-line-no">166</span><span id="line-166"> /** The wait time in millis before checking again if the region's previous RS is back online */</span> |
| <span class="source-line-no">167</span><span id="line-167"> public static final String FORCE_REGION_RETAINMENT_WAIT_INTERVAL =</span> |
| <span class="source-line-no">168</span><span id="line-168"> "hbase.master.scp.retain.assignment.force.wait-interval";</span> |
| <span class="source-line-no">169</span><span id="line-169"></span> |
| <span class="source-line-no">170</span><span id="line-170"> public static final long DEFAULT_FORCE_REGION_RETAINMENT_WAIT_INTERVAL = 50;</span> |
| <span class="source-line-no">171</span><span id="line-171"></span> |
| <span class="source-line-no">172</span><span id="line-172"> /**</span> |
| <span class="source-line-no">173</span><span id="line-173"> * The number of times to check if the region's previous RS is back online, before giving up and</span> |
| <span class="source-line-no">174</span><span id="line-174"> * proceeding with assignment on a new RS</span> |
| <span class="source-line-no">175</span><span id="line-175"> */</span> |
| <span class="source-line-no">176</span><span id="line-176"> public static final String FORCE_REGION_RETAINMENT_RETRIES =</span> |
| <span class="source-line-no">177</span><span id="line-177"> "hbase.master.scp.retain.assignment.force.retries";</span> |
| <span class="source-line-no">178</span><span id="line-178"></span> |
| <span class="source-line-no">179</span><span id="line-179"> public static final int DEFAULT_FORCE_REGION_RETAINMENT_RETRIES = 600;</span> |
| <span class="source-line-no">180</span><span id="line-180"></span> |
| <span class="source-line-no">181</span><span id="line-181"> private final ProcedureEvent<?> metaAssignEvent = new ProcedureEvent<>("meta assign");</span> |
| <span class="source-line-no">182</span><span id="line-182"> private final ProcedureEvent<?> metaLoadEvent = new ProcedureEvent<>("meta load");</span> |
| <span class="source-line-no">183</span><span id="line-183"></span> |
| <span class="source-line-no">184</span><span id="line-184"> private final MetricsAssignmentManager metrics;</span> |
| <span class="source-line-no">185</span><span id="line-185"> private final RegionInTransitionChore ritChore;</span> |
| <span class="source-line-no">186</span><span id="line-186"> private final DeadServerMetricRegionChore deadMetricChore;</span> |
| <span class="source-line-no">187</span><span id="line-187"> private final MasterServices master;</span> |
| <span class="source-line-no">188</span><span id="line-188"></span> |
| <span class="source-line-no">189</span><span id="line-189"> private final AtomicBoolean running = new AtomicBoolean(false);</span> |
| <span class="source-line-no">190</span><span id="line-190"> private final RegionStates regionStates = new RegionStates();</span> |
| <span class="source-line-no">191</span><span id="line-191"> private final RegionStateStore regionStateStore;</span> |
| <span class="source-line-no">192</span><span id="line-192"></span> |
| <span class="source-line-no">193</span><span id="line-193"> /**</span> |
| <span class="source-line-no">194</span><span id="line-194"> * When the operator uses this configuration option, any version between the current cluster</span> |
| <span class="source-line-no">195</span><span id="line-195"> * version and the value of "hbase.min.version.move.system.tables" does not trigger any</span> |
| <span class="source-line-no">196</span><span id="line-196"> * auto-region movement. Auto-region movement here refers to auto-migration of system table</span> |
| <span class="source-line-no">197</span><span id="line-197"> * regions to newer server versions. It is assumed that the configured range of versions does not</span> |
| <span class="source-line-no">198</span><span id="line-198"> * require special handling of moving system table regions to higher versioned RegionServer. This</span> |
| <span class="source-line-no">199</span><span id="line-199"> * auto-migration is done by {@link #checkIfShouldMoveSystemRegionAsync()}. Example: Let's assume</span> |
| <span class="source-line-no">200</span><span id="line-200"> * the cluster is on version 1.4.0 and we have set "hbase.min.version.move.system.tables" as</span> |
| <span class="source-line-no">201</span><span id="line-201"> * "2.0.0". Now if we upgrade one RegionServer on 1.4.0 cluster to 1.6.0 (< 2.0.0), then</span> |
| <span class="source-line-no">202</span><span id="line-202"> * AssignmentManager will not move hbase:meta, hbase:namespace and other system table regions to</span> |
| <span class="source-line-no">203</span><span id="line-203"> * newly brought up RegionServer 1.6.0 as part of auto-migration. However, if we upgrade one</span> |
| <span class="source-line-no">204</span><span id="line-204"> * RegionServer on 1.4.0 cluster to 2.2.0 (> 2.0.0), then AssignmentManager will move all system</span> |
| <span class="source-line-no">205</span><span id="line-205"> * table regions to newly brought up RegionServer 2.2.0 as part of auto-migration done by</span> |
| <span class="source-line-no">206</span><span id="line-206"> * {@link #checkIfShouldMoveSystemRegionAsync()}. "hbase.min.version.move.system.tables" is</span> |
| <span class="source-line-no">207</span><span id="line-207"> * introduced as part of HBASE-22923.</span> |
| <span class="source-line-no">208</span><span id="line-208"> */</span> |
| <span class="source-line-no">209</span><span id="line-209"> private final String minVersionToMoveSysTables;</span> |
| <span class="source-line-no">210</span><span id="line-210"></span> |
| <span class="source-line-no">211</span><span id="line-211"> private static final String MIN_VERSION_MOVE_SYS_TABLES_CONFIG =</span> |
| <span class="source-line-no">212</span><span id="line-212"> "hbase.min.version.move.system.tables";</span> |
| <span class="source-line-no">213</span><span id="line-213"> private static final String DEFAULT_MIN_VERSION_MOVE_SYS_TABLES_CONFIG = "";</span> |
| <span class="source-line-no">214</span><span id="line-214"></span> |
| <span class="source-line-no">215</span><span id="line-215"> private final Map<ServerName, Set<byte[]>> rsReports = new HashMap<>();</span> |
| <span class="source-line-no">216</span><span id="line-216"></span> |
| <span class="source-line-no">217</span><span id="line-217"> private final boolean shouldAssignRegionsWithFavoredNodes;</span> |
| <span class="source-line-no">218</span><span id="line-218"> private final int assignDispatchWaitQueueMaxSize;</span> |
| <span class="source-line-no">219</span><span id="line-219"> private final int assignDispatchWaitMillis;</span> |
| <span class="source-line-no">220</span><span id="line-220"> private final int assignMaxAttempts;</span> |
| <span class="source-line-no">221</span><span id="line-221"> private final int assignRetryImmediatelyMaxAttempts;</span> |
| <span class="source-line-no">222</span><span id="line-222"></span> |
| <span class="source-line-no">223</span><span id="line-223"> private final MasterRegion masterRegion;</span> |
| <span class="source-line-no">224</span><span id="line-224"></span> |
| <span class="source-line-no">225</span><span id="line-225"> private final Object checkIfShouldMoveSystemRegionLock = new Object();</span> |
| <span class="source-line-no">226</span><span id="line-226"></span> |
| <span class="source-line-no">227</span><span id="line-227"> private Thread assignThread;</span> |
| <span class="source-line-no">228</span><span id="line-228"></span> |
| <span class="source-line-no">229</span><span id="line-229"> private final boolean forceRegionRetainment;</span> |
| <span class="source-line-no">230</span><span id="line-230"></span> |
| <span class="source-line-no">231</span><span id="line-231"> private final long forceRegionRetainmentWaitInterval;</span> |
| <span class="source-line-no">232</span><span id="line-232"></span> |
| <span class="source-line-no">233</span><span id="line-233"> private final int forceRegionRetainmentRetries;</span> |
| <span class="source-line-no">234</span><span id="line-234"></span> |
| <span class="source-line-no">235</span><span id="line-235"> public AssignmentManager(MasterServices master, MasterRegion masterRegion) {</span> |
| <span class="source-line-no">236</span><span id="line-236"> this(master, masterRegion, new RegionStateStore(master, masterRegion));</span> |
| <span class="source-line-no">237</span><span id="line-237"> }</span> |
| <span class="source-line-no">238</span><span id="line-238"></span> |
| <span class="source-line-no">239</span><span id="line-239"> AssignmentManager(MasterServices master, MasterRegion masterRegion, RegionStateStore stateStore) {</span> |
| <span class="source-line-no">240</span><span id="line-240"> this.master = master;</span> |
| <span class="source-line-no">241</span><span id="line-241"> this.regionStateStore = stateStore;</span> |
| <span class="source-line-no">242</span><span id="line-242"> this.metrics = new MetricsAssignmentManager();</span> |
| <span class="source-line-no">243</span><span id="line-243"> this.masterRegion = masterRegion;</span> |
| <span class="source-line-no">244</span><span id="line-244"></span> |
| <span class="source-line-no">245</span><span id="line-245"> final Configuration conf = master.getConfiguration();</span> |
| <span class="source-line-no">246</span><span id="line-246"></span> |
| <span class="source-line-no">247</span><span id="line-247"> // Only read favored nodes if using the favored nodes load balancer.</span> |
| <span class="source-line-no">248</span><span id="line-248"> this.shouldAssignRegionsWithFavoredNodes = FavoredStochasticBalancer.class</span> |
| <span class="source-line-no">249</span><span id="line-249"> .isAssignableFrom(conf.getClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, Object.class));</span> |
| <span class="source-line-no">250</span><span id="line-250"></span> |
| <span class="source-line-no">251</span><span id="line-251"> this.assignDispatchWaitMillis =</span> |
| <span class="source-line-no">252</span><span id="line-252"> conf.getInt(ASSIGN_DISPATCH_WAIT_MSEC_CONF_KEY, DEFAULT_ASSIGN_DISPATCH_WAIT_MSEC);</span> |
| <span class="source-line-no">253</span><span id="line-253"> this.assignDispatchWaitQueueMaxSize =</span> |
| <span class="source-line-no">254</span><span id="line-254"> conf.getInt(ASSIGN_DISPATCH_WAITQ_MAX_CONF_KEY, DEFAULT_ASSIGN_DISPATCH_WAITQ_MAX);</span> |
| <span class="source-line-no">255</span><span id="line-255"></span> |
| <span class="source-line-no">256</span><span id="line-256"> this.assignMaxAttempts =</span> |
| <span class="source-line-no">257</span><span id="line-257"> Math.max(1, conf.getInt(ASSIGN_MAX_ATTEMPTS, DEFAULT_ASSIGN_MAX_ATTEMPTS));</span> |
| <span class="source-line-no">258</span><span id="line-258"> this.assignRetryImmediatelyMaxAttempts = conf.getInt(ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS,</span> |
| <span class="source-line-no">259</span><span id="line-259"> DEFAULT_ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS);</span> |
| <span class="source-line-no">260</span><span id="line-260"></span> |
| <span class="source-line-no">261</span><span id="line-261"> int ritChoreInterval =</span> |
| <span class="source-line-no">262</span><span id="line-262"> conf.getInt(RIT_CHORE_INTERVAL_MSEC_CONF_KEY, DEFAULT_RIT_CHORE_INTERVAL_MSEC);</span> |
| <span class="source-line-no">263</span><span id="line-263"> this.ritChore = new RegionInTransitionChore(ritChoreInterval);</span> |
| <span class="source-line-no">264</span><span id="line-264"></span> |
| <span class="source-line-no">265</span><span id="line-265"> int deadRegionChoreInterval = conf.getInt(DEAD_REGION_METRIC_CHORE_INTERVAL_MSEC_CONF_KEY,</span> |
| <span class="source-line-no">266</span><span id="line-266"> DEFAULT_DEAD_REGION_METRIC_CHORE_INTERVAL_MSEC);</span> |
| <span class="source-line-no">267</span><span id="line-267"> if (deadRegionChoreInterval > 0) {</span> |
| <span class="source-line-no">268</span><span id="line-268"> this.deadMetricChore = new DeadServerMetricRegionChore(deadRegionChoreInterval);</span> |
| <span class="source-line-no">269</span><span id="line-269"> } else {</span> |
| <span class="source-line-no">270</span><span id="line-270"> this.deadMetricChore = null;</span> |
| <span class="source-line-no">271</span><span id="line-271"> }</span> |
| <span class="source-line-no">272</span><span id="line-272"> minVersionToMoveSysTables =</span> |
| <span class="source-line-no">273</span><span id="line-273"> conf.get(MIN_VERSION_MOVE_SYS_TABLES_CONFIG, DEFAULT_MIN_VERSION_MOVE_SYS_TABLES_CONFIG);</span> |
| <span class="source-line-no">274</span><span id="line-274"></span> |
| <span class="source-line-no">275</span><span id="line-275"> forceRegionRetainment =</span> |
| <span class="source-line-no">276</span><span id="line-276"> conf.getBoolean(FORCE_REGION_RETAINMENT, DEFAULT_FORCE_REGION_RETAINMENT);</span> |
| <span class="source-line-no">277</span><span id="line-277"> forceRegionRetainmentWaitInterval = conf.getLong(FORCE_REGION_RETAINMENT_WAIT_INTERVAL,</span> |
| <span class="source-line-no">278</span><span id="line-278"> DEFAULT_FORCE_REGION_RETAINMENT_WAIT_INTERVAL);</span> |
| <span class="source-line-no">279</span><span id="line-279"> forceRegionRetainmentRetries =</span> |
| <span class="source-line-no">280</span><span id="line-280"> conf.getInt(FORCE_REGION_RETAINMENT_RETRIES, DEFAULT_FORCE_REGION_RETAINMENT_RETRIES);</span> |
| <span class="source-line-no">281</span><span id="line-281"> }</span> |
| <span class="source-line-no">282</span><span id="line-282"></span> |
| <span class="source-line-no">283</span><span id="line-283"> private void mirrorMetaLocations() throws IOException, KeeperException {</span> |
| <span class="source-line-no">284</span><span id="line-284"> // For compatibility, mirror the meta region state to zookeeper</span> |
| <span class="source-line-no">285</span><span id="line-285"> // And we still need to use zookeeper to publish the meta region locations to region</span> |
| <span class="source-line-no">286</span><span id="line-286"> // server, so they can serve as ClientMetaService</span> |
| <span class="source-line-no">287</span><span id="line-287"> ZKWatcher zk = master.getZooKeeper();</span> |
| <span class="source-line-no">288</span><span id="line-288"> if (zk == null || !zk.getRecoverableZooKeeper().getState().isAlive()) {</span> |
| <span class="source-line-no">289</span><span id="line-289"> // this is possible in tests, we do not provide a zk watcher or the zk watcher has been closed</span> |
| <span class="source-line-no">290</span><span id="line-290"> return;</span> |
| <span class="source-line-no">291</span><span id="line-291"> }</span> |
| <span class="source-line-no">292</span><span id="line-292"> Collection<RegionStateNode> metaStates = regionStates.getRegionStateNodes();</span> |
| <span class="source-line-no">293</span><span id="line-293"> for (RegionStateNode metaState : metaStates) {</span> |
| <span class="source-line-no">294</span><span id="line-294"> MetaTableLocator.setMetaLocation(zk, metaState.getRegionLocation(),</span> |
| <span class="source-line-no">295</span><span id="line-295"> metaState.getRegionInfo().getReplicaId(), metaState.getState());</span> |
| <span class="source-line-no">296</span><span id="line-296"> }</span> |
| <span class="source-line-no">297</span><span id="line-297"> int replicaCount = metaStates.size();</span> |
| <span class="source-line-no">298</span><span id="line-298"> // remove extra mirror locations</span> |
| <span class="source-line-no">299</span><span id="line-299"> for (String znode : zk.getMetaReplicaNodes()) {</span> |
| <span class="source-line-no">300</span><span id="line-300"> int replicaId = zk.getZNodePaths().getMetaReplicaIdFromZNode(znode);</span> |
| <span class="source-line-no">301</span><span id="line-301"> if (replicaId >= replicaCount) {</span> |
| <span class="source-line-no">302</span><span id="line-302"> MetaTableLocator.deleteMetaLocation(zk, replicaId);</span> |
| <span class="source-line-no">303</span><span id="line-303"> }</span> |
| <span class="source-line-no">304</span><span id="line-304"> }</span> |
| <span class="source-line-no">305</span><span id="line-305"> }</span> |
| <span class="source-line-no">306</span><span id="line-306"></span> |
| <span class="source-line-no">307</span><span id="line-307"> public void start() throws IOException, KeeperException {</span> |
| <span class="source-line-no">308</span><span id="line-308"> if (!running.compareAndSet(false, true)) {</span> |
| <span class="source-line-no">309</span><span id="line-309"> return;</span> |
| <span class="source-line-no">310</span><span id="line-310"> }</span> |
| <span class="source-line-no">311</span><span id="line-311"></span> |
| <span class="source-line-no">312</span><span id="line-312"> LOG.trace("Starting assignment manager");</span> |
| <span class="source-line-no">313</span><span id="line-313"></span> |
| <span class="source-line-no">314</span><span id="line-314"> // Start the Assignment Thread</span> |
| <span class="source-line-no">315</span><span id="line-315"> startAssignmentThread();</span> |
| <span class="source-line-no">316</span><span id="line-316"> // load meta region states.</span> |
| <span class="source-line-no">317</span><span id="line-317"> // here we are still in the early steps of active master startup. There is only one thread(us)</span> |
| <span class="source-line-no">318</span><span id="line-318"> // can access AssignmentManager and create region node, so here we do not need to lock the</span> |
| <span class="source-line-no">319</span><span id="line-319"> // region node.</span> |
| <span class="source-line-no">320</span><span id="line-320"> try (ResultScanner scanner =</span> |
| <span class="source-line-no">321</span><span id="line-321"> masterRegion.getScanner(new Scan().addFamily(HConstants.CATALOG_FAMILY))) {</span> |
| <span class="source-line-no">322</span><span id="line-322"> for (;;) {</span> |
| <span class="source-line-no">323</span><span id="line-323"> Result result = scanner.next();</span> |
| <span class="source-line-no">324</span><span id="line-324"> if (result == null) {</span> |
| <span class="source-line-no">325</span><span id="line-325"> break;</span> |
| <span class="source-line-no">326</span><span id="line-326"> }</span> |
| <span class="source-line-no">327</span><span id="line-327"> RegionStateStore</span> |
| <span class="source-line-no">328</span><span id="line-328"> .visitMetaEntry((r, regionInfo, state, regionLocation, lastHost, openSeqNum) -> {</span> |
| <span class="source-line-no">329</span><span id="line-329"> RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo);</span> |
| <span class="source-line-no">330</span><span id="line-330"> regionNode.setState(state);</span> |
| <span class="source-line-no">331</span><span id="line-331"> regionNode.setLastHost(lastHost);</span> |
| <span class="source-line-no">332</span><span id="line-332"> regionNode.setRegionLocation(regionLocation);</span> |
| <span class="source-line-no">333</span><span id="line-333"> regionNode.setOpenSeqNum(openSeqNum);</span> |
| <span class="source-line-no">334</span><span id="line-334"> if (regionNode.getProcedure() != null) {</span> |
| <span class="source-line-no">335</span><span id="line-335"> regionNode.getProcedure().stateLoaded(this, regionNode);</span> |
| <span class="source-line-no">336</span><span id="line-336"> }</span> |
| <span class="source-line-no">337</span><span id="line-337"> if (regionLocation != null) {</span> |
| <span class="source-line-no">338</span><span id="line-338"> // TODO: this could lead to some orphan server state nodes, as it is possible that the</span> |
| <span class="source-line-no">339</span><span id="line-339"> // region server is already dead and its SCP has already finished but we have</span> |
| <span class="source-line-no">340</span><span id="line-340"> // persisted an opening state on this region server. Finally the TRSP will assign the</span> |
| <span class="source-line-no">341</span><span id="line-341"> // region to another region server, so it will not cause critical problems, just waste</span> |
| <span class="source-line-no">342</span><span id="line-342"> // some memory as no one will try to cleanup these orphan server state nodes.</span> |
| <span class="source-line-no">343</span><span id="line-343"> regionStates.createServer(regionLocation);</span> |
| <span class="source-line-no">344</span><span id="line-344"> regionStates.addRegionToServer(regionNode);</span> |
| <span class="source-line-no">345</span><span id="line-345"> }</span> |
| <span class="source-line-no">346</span><span id="line-346"> if (RegionReplicaUtil.isDefaultReplica(regionInfo.getReplicaId())) {</span> |
| <span class="source-line-no">347</span><span id="line-347"> setMetaAssigned(regionInfo, state == State.OPEN);</span> |
| <span class="source-line-no">348</span><span id="line-348"> }</span> |
| <span class="source-line-no">349</span><span id="line-349"> LOG.debug("Loaded hbase:meta {}", regionNode);</span> |
| <span class="source-line-no">350</span><span id="line-350"> }, result);</span> |
| <span class="source-line-no">351</span><span id="line-351"> }</span> |
| <span class="source-line-no">352</span><span id="line-352"> }</span> |
| <span class="source-line-no">353</span><span id="line-353"> mirrorMetaLocations();</span> |
| <span class="source-line-no">354</span><span id="line-354"> }</span> |
| <span class="source-line-no">355</span><span id="line-355"></span> |
| <span class="source-line-no">356</span><span id="line-356"> /**</span> |
| <span class="source-line-no">357</span><span id="line-357"> * Create RegionStateNode based on the TRSP list, and attach the TRSP to the RegionStateNode.</span> |
| <span class="source-line-no">358</span><span id="line-358"> * <p></span> |
| <span class="source-line-no">359</span><span id="line-359"> * This is used to restore the RIT region list, so we do not need to restore it in the loadingMeta</span> |
| <span class="source-line-no">360</span><span id="line-360"> * method below. And it is also very important as now before submitting a TRSP, we need to attach</span> |
| <span class="source-line-no">361</span><span id="line-361"> * it to the RegionStateNode, which acts like a guard, so we need to restore this information at</span> |
| <span class="source-line-no">362</span><span id="line-362"> * the very beginning, before we start processing any procedures.</span> |
| <span class="source-line-no">363</span><span id="line-363"> */</span> |
| <span class="source-line-no">364</span><span id="line-364"> public void setupRIT(List<TransitRegionStateProcedure> procs) {</span> |
| <span class="source-line-no">365</span><span id="line-365"> procs.forEach(proc -> {</span> |
| <span class="source-line-no">366</span><span id="line-366"> RegionInfo regionInfo = proc.getRegion();</span> |
| <span class="source-line-no">367</span><span id="line-367"> RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo);</span> |
| <span class="source-line-no">368</span><span id="line-368"> TransitRegionStateProcedure existingProc = regionNode.getProcedure();</span> |
| <span class="source-line-no">369</span><span id="line-369"> if (existingProc != null) {</span> |
| <span class="source-line-no">370</span><span id="line-370"> // This is possible, as we will detach the procedure from the RSN before we</span> |
| <span class="source-line-no">371</span><span id="line-371"> // actually finish the procedure. This is because that, we will detach the TRSP from the RSN</span> |
| <span class="source-line-no">372</span><span id="line-372"> // during execution, at that time, the procedure has not been marked as done in the pv2</span> |
| <span class="source-line-no">373</span><span id="line-373"> // framework yet, so it is possible that we schedule a new TRSP immediately and when</span> |
| <span class="source-line-no">374</span><span id="line-374"> // arriving here, we will find out that there are multiple TRSPs for the region. But we can</span> |
| <span class="source-line-no">375</span><span id="line-375"> // make sure that, only the last one can take the charge, the previous ones should have all</span> |
| <span class="source-line-no">376</span><span id="line-376"> // been finished already. So here we will compare the proc id, the greater one will win.</span> |
| <span class="source-line-no">377</span><span id="line-377"> if (existingProc.getProcId() < proc.getProcId()) {</span> |
| <span class="source-line-no">378</span><span id="line-378"> // the new one wins, unset and set it to the new one below</span> |
| <span class="source-line-no">379</span><span id="line-379"> regionNode.unsetProcedure(existingProc);</span> |
| <span class="source-line-no">380</span><span id="line-380"> } else {</span> |
| <span class="source-line-no">381</span><span id="line-381"> // the old one wins, skip</span> |
| <span class="source-line-no">382</span><span id="line-382"> return;</span> |
| <span class="source-line-no">383</span><span id="line-383"> }</span> |
| <span class="source-line-no">384</span><span id="line-384"> }</span> |
| <span class="source-line-no">385</span><span id="line-385"> LOG.info("Attach {} to {} to restore RIT", proc, regionNode);</span> |
| <span class="source-line-no">386</span><span id="line-386"> regionNode.setProcedure(proc);</span> |
| <span class="source-line-no">387</span><span id="line-387"> });</span> |
| <span class="source-line-no">388</span><span id="line-388"> }</span> |
| <span class="source-line-no">389</span><span id="line-389"></span> |
| <span class="source-line-no">390</span><span id="line-390"> public void stop() {</span> |
| <span class="source-line-no">391</span><span id="line-391"> if (!running.compareAndSet(true, false)) {</span> |
| <span class="source-line-no">392</span><span id="line-392"> return;</span> |
| <span class="source-line-no">393</span><span id="line-393"> }</span> |
| <span class="source-line-no">394</span><span id="line-394"></span> |
| <span class="source-line-no">395</span><span id="line-395"> LOG.info("Stopping assignment manager");</span> |
| <span class="source-line-no">396</span><span id="line-396"></span> |
| <span class="source-line-no">397</span><span id="line-397"> // The AM is started before the procedure executor,</span> |
| <span class="source-line-no">398</span><span id="line-398"> // but the actual work will be loaded/submitted only once we have the executor</span> |
| <span class="source-line-no">399</span><span id="line-399"> final boolean hasProcExecutor = master.getMasterProcedureExecutor() != null;</span> |
| <span class="source-line-no">400</span><span id="line-400"></span> |
| <span class="source-line-no">401</span><span id="line-401"> // Remove the RIT chore</span> |
| <span class="source-line-no">402</span><span id="line-402"> if (hasProcExecutor) {</span> |
| <span class="source-line-no">403</span><span id="line-403"> master.getMasterProcedureExecutor().removeChore(this.ritChore);</span> |
| <span class="source-line-no">404</span><span id="line-404"> if (this.deadMetricChore != null) {</span> |
| <span class="source-line-no">405</span><span id="line-405"> master.getMasterProcedureExecutor().removeChore(this.deadMetricChore);</span> |
| <span class="source-line-no">406</span><span id="line-406"> }</span> |
| <span class="source-line-no">407</span><span id="line-407"> }</span> |
| <span class="source-line-no">408</span><span id="line-408"></span> |
| <span class="source-line-no">409</span><span id="line-409"> // Stop the Assignment Thread</span> |
| <span class="source-line-no">410</span><span id="line-410"> stopAssignmentThread();</span> |
| <span class="source-line-no">411</span><span id="line-411"></span> |
| <span class="source-line-no">412</span><span id="line-412"> // Stop the RegionStateStore</span> |
| <span class="source-line-no">413</span><span id="line-413"> regionStates.clear();</span> |
| <span class="source-line-no">414</span><span id="line-414"></span> |
| <span class="source-line-no">415</span><span id="line-415"> // Update meta events (for testing)</span> |
| <span class="source-line-no">416</span><span id="line-416"> if (hasProcExecutor) {</span> |
| <span class="source-line-no">417</span><span id="line-417"> metaLoadEvent.suspend();</span> |
| <span class="source-line-no">418</span><span id="line-418"> for (RegionInfo hri : getMetaRegionSet()) {</span> |
| <span class="source-line-no">419</span><span id="line-419"> setMetaAssigned(hri, false);</span> |
| <span class="source-line-no">420</span><span id="line-420"> }</span> |
| <span class="source-line-no">421</span><span id="line-421"> }</span> |
| <span class="source-line-no">422</span><span id="line-422"> }</span> |
| <span class="source-line-no">423</span><span id="line-423"></span> |
| <span class="source-line-no">424</span><span id="line-424"> public boolean isRunning() {</span> |
| <span class="source-line-no">425</span><span id="line-425"> return running.get();</span> |
| <span class="source-line-no">426</span><span id="line-426"> }</span> |
| <span class="source-line-no">427</span><span id="line-427"></span> |
| <span class="source-line-no">428</span><span id="line-428"> public Configuration getConfiguration() {</span> |
| <span class="source-line-no">429</span><span id="line-429"> return master.getConfiguration();</span> |
| <span class="source-line-no">430</span><span id="line-430"> }</span> |
| <span class="source-line-no">431</span><span id="line-431"></span> |
| <span class="source-line-no">432</span><span id="line-432"> public MetricsAssignmentManager getAssignmentManagerMetrics() {</span> |
| <span class="source-line-no">433</span><span id="line-433"> return metrics;</span> |
| <span class="source-line-no">434</span><span id="line-434"> }</span> |
| <span class="source-line-no">435</span><span id="line-435"></span> |
| <span class="source-line-no">436</span><span id="line-436"> private LoadBalancer getBalancer() {</span> |
| <span class="source-line-no">437</span><span id="line-437"> return master.getLoadBalancer();</span> |
| <span class="source-line-no">438</span><span id="line-438"> }</span> |
| <span class="source-line-no">439</span><span id="line-439"></span> |
| <span class="source-line-no">440</span><span id="line-440"> private FavoredNodesPromoter getFavoredNodePromoter() {</span> |
| <span class="source-line-no">441</span><span id="line-441"> return (FavoredNodesPromoter) ((RSGroupBasedLoadBalancer) master.getLoadBalancer())</span> |
| <span class="source-line-no">442</span><span id="line-442"> .getInternalBalancer();</span> |
| <span class="source-line-no">443</span><span id="line-443"> }</span> |
| <span class="source-line-no">444</span><span id="line-444"></span> |
| <span class="source-line-no">445</span><span id="line-445"> private MasterProcedureEnv getProcedureEnvironment() {</span> |
| <span class="source-line-no">446</span><span id="line-446"> return master.getMasterProcedureExecutor().getEnvironment();</span> |
| <span class="source-line-no">447</span><span id="line-447"> }</span> |
| <span class="source-line-no">448</span><span id="line-448"></span> |
| <span class="source-line-no">449</span><span id="line-449"> private MasterProcedureScheduler getProcedureScheduler() {</span> |
| <span class="source-line-no">450</span><span id="line-450"> return getProcedureEnvironment().getProcedureScheduler();</span> |
| <span class="source-line-no">451</span><span id="line-451"> }</span> |
| <span class="source-line-no">452</span><span id="line-452"></span> |
| <span class="source-line-no">453</span><span id="line-453"> int getAssignMaxAttempts() {</span> |
| <span class="source-line-no">454</span><span id="line-454"> return assignMaxAttempts;</span> |
| <span class="source-line-no">455</span><span id="line-455"> }</span> |
| <span class="source-line-no">456</span><span id="line-456"></span> |
| <span class="source-line-no">457</span><span id="line-457"> public boolean isForceRegionRetainment() {</span> |
| <span class="source-line-no">458</span><span id="line-458"> return forceRegionRetainment;</span> |
| <span class="source-line-no">459</span><span id="line-459"> }</span> |
| <span class="source-line-no">460</span><span id="line-460"></span> |
| <span class="source-line-no">461</span><span id="line-461"> public long getForceRegionRetainmentWaitInterval() {</span> |
| <span class="source-line-no">462</span><span id="line-462"> return forceRegionRetainmentWaitInterval;</span> |
| <span class="source-line-no">463</span><span id="line-463"> }</span> |
| <span class="source-line-no">464</span><span id="line-464"></span> |
| <span class="source-line-no">465</span><span id="line-465"> public int getForceRegionRetainmentRetries() {</span> |
| <span class="source-line-no">466</span><span id="line-466"> return forceRegionRetainmentRetries;</span> |
| <span class="source-line-no">467</span><span id="line-467"> }</span> |
| <span class="source-line-no">468</span><span id="line-468"></span> |
| <span class="source-line-no">469</span><span id="line-469"> int getAssignRetryImmediatelyMaxAttempts() {</span> |
| <span class="source-line-no">470</span><span id="line-470"> return assignRetryImmediatelyMaxAttempts;</span> |
| <span class="source-line-no">471</span><span id="line-471"> }</span> |
| <span class="source-line-no">472</span><span id="line-472"></span> |
| <span class="source-line-no">473</span><span id="line-473"> public RegionStates getRegionStates() {</span> |
| <span class="source-line-no">474</span><span id="line-474"> return regionStates;</span> |
| <span class="source-line-no">475</span><span id="line-475"> }</span> |
| <span class="source-line-no">476</span><span id="line-476"></span> |
| <span class="source-line-no">477</span><span id="line-477"> /**</span> |
| <span class="source-line-no">478</span><span id="line-478"> * Returns the regions hosted by the specified server.</span> |
| <span class="source-line-no">479</span><span id="line-479"> * <p/></span> |
| <span class="source-line-no">480</span><span id="line-480"> * Notice that, for SCP, after we submit the SCP, no one can change the region list for the</span> |
| <span class="source-line-no">481</span><span id="line-481"> * ServerStateNode so we do not need any locks here. And for other usage, this can only give you a</span> |
| <span class="source-line-no">482</span><span id="line-482"> * snapshot of the current region list for this server, which means, right after you get the</span> |
| <span class="source-line-no">483</span><span id="line-483"> * region list, new regions may be moved to this server or some regions may be moved out from this</span> |
| <span class="source-line-no">484</span><span id="line-484"> * server, so you should not use it critically if you need strong consistency.</span> |
| <span class="source-line-no">485</span><span id="line-485"> */</span> |
| <span class="source-line-no">486</span><span id="line-486"> public List<RegionInfo> getRegionsOnServer(ServerName serverName) {</span> |
| <span class="source-line-no">487</span><span id="line-487"> ServerStateNode serverInfo = regionStates.getServerNode(serverName);</span> |
| <span class="source-line-no">488</span><span id="line-488"> if (serverInfo == null) {</span> |
| <span class="source-line-no">489</span><span id="line-489"> return Collections.emptyList();</span> |
| <span class="source-line-no">490</span><span id="line-490"> }</span> |
| <span class="source-line-no">491</span><span id="line-491"> return serverInfo.getRegionInfoList();</span> |
| <span class="source-line-no">492</span><span id="line-492"> }</span> |
| <span class="source-line-no">493</span><span id="line-493"></span> |
| <span class="source-line-no">494</span><span id="line-494"> private RegionInfo getRegionInfo(RegionStateNode rsn) {</span> |
| <span class="source-line-no">495</span><span id="line-495"> if (rsn.isSplit() && !rsn.getRegionInfo().isSplit()) {</span> |
| <span class="source-line-no">496</span><span id="line-496"> // see the comments in markRegionAsSplit on why we need to do this converting.</span> |
| <span class="source-line-no">497</span><span id="line-497"> return RegionInfoBuilder.newBuilder(rsn.getRegionInfo()).setSplit(true).setOffline(true)</span> |
| <span class="source-line-no">498</span><span id="line-498"> .build();</span> |
| <span class="source-line-no">499</span><span id="line-499"> } else {</span> |
| <span class="source-line-no">500</span><span id="line-500"> return rsn.getRegionInfo();</span> |
| <span class="source-line-no">501</span><span id="line-501"> }</span> |
| <span class="source-line-no">502</span><span id="line-502"> }</span> |
| <span class="source-line-no">503</span><span id="line-503"></span> |
| <span class="source-line-no">504</span><span id="line-504"> private Stream<RegionStateNode> getRegionStateNodes(TableName tableName,</span> |
| <span class="source-line-no">505</span><span id="line-505"> boolean excludeOfflinedSplitParents) {</span> |
| <span class="source-line-no">506</span><span id="line-506"> Stream<RegionStateNode> stream = regionStates.getTableRegionStateNodes(tableName).stream();</span> |
| <span class="source-line-no">507</span><span id="line-507"> if (excludeOfflinedSplitParents) {</span> |
| <span class="source-line-no">508</span><span id="line-508"> return stream.filter(rsn -> !rsn.isSplit());</span> |
| <span class="source-line-no">509</span><span id="line-509"> } else {</span> |
| <span class="source-line-no">510</span><span id="line-510"> return stream;</span> |
| <span class="source-line-no">511</span><span id="line-511"> }</span> |
| <span class="source-line-no">512</span><span id="line-512"> }</span> |
| <span class="source-line-no">513</span><span id="line-513"></span> |
| <span class="source-line-no">514</span><span id="line-514"> public List<RegionInfo> getTableRegions(TableName tableName,</span> |
| <span class="source-line-no">515</span><span id="line-515"> boolean excludeOfflinedSplitParents) {</span> |
| <span class="source-line-no">516</span><span id="line-516"> return getRegionStateNodes(tableName, excludeOfflinedSplitParents).map(this::getRegionInfo)</span> |
| <span class="source-line-no">517</span><span id="line-517"> .collect(Collectors.toList());</span> |
| <span class="source-line-no">518</span><span id="line-518"> }</span> |
| <span class="source-line-no">519</span><span id="line-519"></span> |
| <span class="source-line-no">520</span><span id="line-520"> public List<Pair<RegionInfo, ServerName>> getTableRegionsAndLocations(TableName tableName,</span> |
| <span class="source-line-no">521</span><span id="line-521"> boolean excludeOfflinedSplitParents) {</span> |
| <span class="source-line-no">522</span><span id="line-522"> return getRegionStateNodes(tableName, excludeOfflinedSplitParents)</span> |
| <span class="source-line-no">523</span><span id="line-523"> .map(rsn -> Pair.newPair(getRegionInfo(rsn), rsn.getRegionLocation()))</span> |
| <span class="source-line-no">524</span><span id="line-524"> .collect(Collectors.toList());</span> |
| <span class="source-line-no">525</span><span id="line-525"> }</span> |
| <span class="source-line-no">526</span><span id="line-526"></span> |
| <span class="source-line-no">527</span><span id="line-527"> public RegionStateStore getRegionStateStore() {</span> |
| <span class="source-line-no">528</span><span id="line-528"> return regionStateStore;</span> |
| <span class="source-line-no">529</span><span id="line-529"> }</span> |
| <span class="source-line-no">530</span><span id="line-530"></span> |
| <span class="source-line-no">531</span><span id="line-531"> public List<ServerName> getFavoredNodes(final RegionInfo regionInfo) {</span> |
| <span class="source-line-no">532</span><span id="line-532"> return this.shouldAssignRegionsWithFavoredNodes</span> |
| <span class="source-line-no">533</span><span id="line-533"> ? getFavoredNodePromoter().getFavoredNodes(regionInfo)</span> |
| <span class="source-line-no">534</span><span id="line-534"> : ServerName.EMPTY_SERVER_LIST;</span> |
| <span class="source-line-no">535</span><span id="line-535"> }</span> |
| <span class="source-line-no">536</span><span id="line-536"></span> |
| <span class="source-line-no">537</span><span id="line-537"> // ============================================================================================</span> |
| <span class="source-line-no">538</span><span id="line-538"> // Table State Manager helpers</span> |
| <span class="source-line-no">539</span><span id="line-539"> // ============================================================================================</span> |
| <span class="source-line-no">540</span><span id="line-540"> private TableStateManager getTableStateManager() {</span> |
| <span class="source-line-no">541</span><span id="line-541"> return master.getTableStateManager();</span> |
| <span class="source-line-no">542</span><span id="line-542"> }</span> |
| <span class="source-line-no">543</span><span id="line-543"></span> |
| <span class="source-line-no">544</span><span id="line-544"> private boolean isTableEnabled(final TableName tableName) {</span> |
| <span class="source-line-no">545</span><span id="line-545"> return getTableStateManager().isTableState(tableName, TableState.State.ENABLED);</span> |
| <span class="source-line-no">546</span><span id="line-546"> }</span> |
| <span class="source-line-no">547</span><span id="line-547"></span> |
| <span class="source-line-no">548</span><span id="line-548"> private boolean isTableDisabled(final TableName tableName) {</span> |
| <span class="source-line-no">549</span><span id="line-549"> return getTableStateManager().isTableState(tableName, TableState.State.DISABLED,</span> |
| <span class="source-line-no">550</span><span id="line-550"> TableState.State.DISABLING);</span> |
| <span class="source-line-no">551</span><span id="line-551"> }</span> |
| <span class="source-line-no">552</span><span id="line-552"></span> |
| <span class="source-line-no">553</span><span id="line-553"> // ============================================================================================</span> |
| <span class="source-line-no">554</span><span id="line-554"> // META Helpers</span> |
| <span class="source-line-no">555</span><span id="line-555"> // ============================================================================================</span> |
| <span class="source-line-no">556</span><span id="line-556"> private boolean isMetaRegion(final RegionInfo regionInfo) {</span> |
| <span class="source-line-no">557</span><span id="line-557"> return regionInfo.isMetaRegion();</span> |
| <span class="source-line-no">558</span><span id="line-558"> }</span> |
| <span class="source-line-no">559</span><span id="line-559"></span> |
| <span class="source-line-no">560</span><span id="line-560"> public boolean isMetaRegion(final byte[] regionName) {</span> |
| <span class="source-line-no">561</span><span id="line-561"> return getMetaRegionFromName(regionName) != null;</span> |
| <span class="source-line-no">562</span><span id="line-562"> }</span> |
| <span class="source-line-no">563</span><span id="line-563"></span> |
| <span class="source-line-no">564</span><span id="line-564"> public RegionInfo getMetaRegionFromName(final byte[] regionName) {</span> |
| <span class="source-line-no">565</span><span id="line-565"> for (RegionInfo hri : getMetaRegionSet()) {</span> |
| <span class="source-line-no">566</span><span id="line-566"> if (Bytes.equals(hri.getRegionName(), regionName)) {</span> |
| <span class="source-line-no">567</span><span id="line-567"> return hri;</span> |
| <span class="source-line-no">568</span><span id="line-568"> }</span> |
| <span class="source-line-no">569</span><span id="line-569"> }</span> |
| <span class="source-line-no">570</span><span id="line-570"> return null;</span> |
| <span class="source-line-no">571</span><span id="line-571"> }</span> |
| <span class="source-line-no">572</span><span id="line-572"></span> |
| <span class="source-line-no">573</span><span id="line-573"> public boolean isCarryingMeta(final ServerName serverName) {</span> |
| <span class="source-line-no">574</span><span id="line-574"> // TODO: handle multiple meta</span> |
| <span class="source-line-no">575</span><span id="line-575"> return isCarryingRegion(serverName, RegionInfoBuilder.FIRST_META_REGIONINFO);</span> |
| <span class="source-line-no">576</span><span id="line-576"> }</span> |
| <span class="source-line-no">577</span><span id="line-577"></span> |
| <span class="source-line-no">578</span><span id="line-578"> private boolean isCarryingRegion(final ServerName serverName, final RegionInfo regionInfo) {</span> |
| <span class="source-line-no">579</span><span id="line-579"> // TODO: check for state?</span> |
| <span class="source-line-no">580</span><span id="line-580"> final RegionStateNode node = regionStates.getRegionStateNode(regionInfo);</span> |
| <span class="source-line-no">581</span><span id="line-581"> return (node != null && serverName.equals(node.getRegionLocation()));</span> |
| <span class="source-line-no">582</span><span id="line-582"> }</span> |
| <span class="source-line-no">583</span><span id="line-583"></span> |
| <span class="source-line-no">584</span><span id="line-584"> private RegionInfo getMetaForRegion(final RegionInfo regionInfo) {</span> |
| <span class="source-line-no">585</span><span id="line-585"> // if (regionInfo.isMetaRegion()) return regionInfo;</span> |
| <span class="source-line-no">586</span><span id="line-586"> // TODO: handle multiple meta. if the region provided is not meta lookup</span> |
| <span class="source-line-no">587</span><span id="line-587"> // which meta the region belongs to.</span> |
| <span class="source-line-no">588</span><span id="line-588"> return RegionInfoBuilder.FIRST_META_REGIONINFO;</span> |
| <span class="source-line-no">589</span><span id="line-589"> }</span> |
| <span class="source-line-no">590</span><span id="line-590"></span> |
| <span class="source-line-no">591</span><span id="line-591"> // TODO: handle multiple meta.</span> |
| <span class="source-line-no">592</span><span id="line-592"> private static final Set<RegionInfo> META_REGION_SET =</span> |
| <span class="source-line-no">593</span><span id="line-593"> Collections.singleton(RegionInfoBuilder.FIRST_META_REGIONINFO);</span> |
| <span class="source-line-no">594</span><span id="line-594"></span> |
| <span class="source-line-no">595</span><span id="line-595"> public Set<RegionInfo> getMetaRegionSet() {</span> |
| <span class="source-line-no">596</span><span id="line-596"> return META_REGION_SET;</span> |
| <span class="source-line-no">597</span><span id="line-597"> }</span> |
| <span class="source-line-no">598</span><span id="line-598"></span> |
| <span class="source-line-no">599</span><span id="line-599"> // ============================================================================================</span> |
| <span class="source-line-no">600</span><span id="line-600"> // META Event(s) helpers</span> |
| <span class="source-line-no">601</span><span id="line-601"> // ============================================================================================</span> |
| <span class="source-line-no">602</span><span id="line-602"> /**</span> |
| <span class="source-line-no">603</span><span id="line-603"> * Notice that, this only means the meta region is available on a RS, but the AM may still be</span> |
| <span class="source-line-no">604</span><span id="line-604"> * loading the region states from meta, so usually you need to check {@link #isMetaLoaded()} first</span> |
| <span class="source-line-no">605</span><span id="line-605"> * before checking this method, unless you can make sure that your piece of code can only be</span> |
| <span class="source-line-no">606</span><span id="line-606"> * executed after AM builds the region states.</span> |
| <span class="source-line-no">607</span><span id="line-607"> * @see #isMetaLoaded()</span> |
| <span class="source-line-no">608</span><span id="line-608"> */</span> |
| <span class="source-line-no">609</span><span id="line-609"> public boolean isMetaAssigned() {</span> |
| <span class="source-line-no">610</span><span id="line-610"> return metaAssignEvent.isReady();</span> |
| <span class="source-line-no">611</span><span id="line-611"> }</span> |
| <span class="source-line-no">612</span><span id="line-612"></span> |
| <span class="source-line-no">613</span><span id="line-613"> public boolean isMetaRegionInTransition() {</span> |
| <span class="source-line-no">614</span><span id="line-614"> return !isMetaAssigned();</span> |
| <span class="source-line-no">615</span><span id="line-615"> }</span> |
| <span class="source-line-no">616</span><span id="line-616"></span> |
| <span class="source-line-no">617</span><span id="line-617"> /**</span> |
| <span class="source-line-no">618</span><span id="line-618"> * Notice that this event does not mean the AM has already finished region state rebuilding. See</span> |
| <span class="source-line-no">619</span><span id="line-619"> * the comment of {@link #isMetaAssigned()} for more details.</span> |
| <span class="source-line-no">620</span><span id="line-620"> * @see #isMetaAssigned()</span> |
| <span class="source-line-no">621</span><span id="line-621"> */</span> |
| <span class="source-line-no">622</span><span id="line-622"> public boolean waitMetaAssigned(Procedure<?> proc, RegionInfo regionInfo) {</span> |
| <span class="source-line-no">623</span><span id="line-623"> return getMetaAssignEvent(getMetaForRegion(regionInfo)).suspendIfNotReady(proc);</span> |
| <span class="source-line-no">624</span><span id="line-624"> }</span> |
| <span class="source-line-no">625</span><span id="line-625"></span> |
| <span class="source-line-no">626</span><span id="line-626"> private void setMetaAssigned(RegionInfo metaRegionInfo, boolean assigned) {</span> |
| <span class="source-line-no">627</span><span id="line-627"> assert isMetaRegion(metaRegionInfo) : "unexpected non-meta region " + metaRegionInfo;</span> |
| <span class="source-line-no">628</span><span id="line-628"> ProcedureEvent<?> metaAssignEvent = getMetaAssignEvent(metaRegionInfo);</span> |
| <span class="source-line-no">629</span><span id="line-629"> if (assigned) {</span> |
| <span class="source-line-no">630</span><span id="line-630"> metaAssignEvent.wake(getProcedureScheduler());</span> |
| <span class="source-line-no">631</span><span id="line-631"> } else {</span> |
| <span class="source-line-no">632</span><span id="line-632"> metaAssignEvent.suspend();</span> |
| <span class="source-line-no">633</span><span id="line-633"> }</span> |
| <span class="source-line-no">634</span><span id="line-634"> }</span> |
| <span class="source-line-no">635</span><span id="line-635"></span> |
| <span class="source-line-no">636</span><span id="line-636"> private ProcedureEvent<?> getMetaAssignEvent(RegionInfo metaRegionInfo) {</span> |
| <span class="source-line-no">637</span><span id="line-637"> assert isMetaRegion(metaRegionInfo) : "unexpected non-meta region " + metaRegionInfo;</span> |
| <span class="source-line-no">638</span><span id="line-638"> // TODO: handle multiple meta.</span> |
| <span class="source-line-no">639</span><span id="line-639"> return metaAssignEvent;</span> |
| <span class="source-line-no">640</span><span id="line-640"> }</span> |
| <span class="source-line-no">641</span><span id="line-641"></span> |
| <span class="source-line-no">642</span><span id="line-642"> /**</span> |
| <span class="source-line-no">643</span><span id="line-643"> * Wait until AM finishes the meta loading, i.e, the region states rebuilding.</span> |
| <span class="source-line-no">644</span><span id="line-644"> * @see #isMetaLoaded()</span> |
| <span class="source-line-no">645</span><span id="line-645"> * @see #waitMetaAssigned(Procedure, RegionInfo)</span> |
| <span class="source-line-no">646</span><span id="line-646"> */</span> |
| <span class="source-line-no">647</span><span id="line-647"> public boolean waitMetaLoaded(Procedure<?> proc) {</span> |
| <span class="source-line-no">648</span><span id="line-648"> return metaLoadEvent.suspendIfNotReady(proc);</span> |
| <span class="source-line-no">649</span><span id="line-649"> }</span> |
| <span class="source-line-no">650</span><span id="line-650"></span> |
| <span class="source-line-no">651</span><span id="line-651"> /**</span> |
| <span class="source-line-no">652</span><span id="line-652"> * This method will be called in master initialization method after calling</span> |
| <span class="source-line-no">653</span><span id="line-653"> * {@link #processOfflineRegions()}, as in processOfflineRegions we will generate assign</span> |
| <span class="source-line-no">654</span><span id="line-654"> * procedures for offline regions, which may be conflict with creating table.</span> |
| <span class="source-line-no">655</span><span id="line-655"> * <p/></span> |
| <span class="source-line-no">656</span><span id="line-656"> * This is a bit dirty, should be reconsidered after we decide whether to keep the</span> |
| <span class="source-line-no">657</span><span id="line-657"> * {@link #processOfflineRegions()} method.</span> |
| <span class="source-line-no">658</span><span id="line-658"> */</span> |
| <span class="source-line-no">659</span><span id="line-659"> public void wakeMetaLoadedEvent() {</span> |
| <span class="source-line-no">660</span><span id="line-660"> metaLoadEvent.wake(getProcedureScheduler());</span> |
| <span class="source-line-no">661</span><span id="line-661"> assert isMetaLoaded() : "expected meta to be loaded";</span> |
| <span class="source-line-no">662</span><span id="line-662"> }</span> |
| <span class="source-line-no">663</span><span id="line-663"></span> |
| <span class="source-line-no">664</span><span id="line-664"> /**</span> |
| <span class="source-line-no">665</span><span id="line-665"> * Return whether AM finishes the meta loading, i.e, the region states rebuilding.</span> |
| <span class="source-line-no">666</span><span id="line-666"> * @see #isMetaAssigned()</span> |
| <span class="source-line-no">667</span><span id="line-667"> * @see #waitMetaLoaded(Procedure)</span> |
| <span class="source-line-no">668</span><span id="line-668"> */</span> |
| <span class="source-line-no">669</span><span id="line-669"> public boolean isMetaLoaded() {</span> |
| <span class="source-line-no">670</span><span id="line-670"> return metaLoadEvent.isReady();</span> |
| <span class="source-line-no">671</span><span id="line-671"> }</span> |
| <span class="source-line-no">672</span><span id="line-672"></span> |
| <span class="source-line-no">673</span><span id="line-673"> /**</span> |
| <span class="source-line-no">674</span><span id="line-674"> * Start a new thread to check if there are region servers whose versions are higher than others.</span> |
| <span class="source-line-no">675</span><span id="line-675"> * If so, move all system table regions to RS with the highest version to keep compatibility. The</span> |
| <span class="source-line-no">676</span><span id="line-676"> * reason is, RS in new version may not be able to access RS in old version when there are some</span> |
| <span class="source-line-no">677</span><span id="line-677"> * incompatible changes.</span> |
| <span class="source-line-no">678</span><span id="line-678"> * <p></span> |
| <span class="source-line-no">679</span><span id="line-679"> * This method is called when a new RegionServer is added to cluster only.</span> |
| <span class="source-line-no">680</span><span id="line-680"> * </p></span> |
| <span class="source-line-no">681</span><span id="line-681"> */</span> |
| <span class="source-line-no">682</span><span id="line-682"> public void checkIfShouldMoveSystemRegionAsync() {</span> |
| <span class="source-line-no">683</span><span id="line-683"> // TODO: Fix this thread. If a server is killed and a new one started, this thread thinks that</span> |
| <span class="source-line-no">684</span><span id="line-684"> // it should 'move' the system tables from the old server to the new server but</span> |
| <span class="source-line-no">685</span><span id="line-685"> // ServerCrashProcedure is on it; and it will take care of the assign without dataloss.</span> |
| <span class="source-line-no">686</span><span id="line-686"> if (this.master.getServerManager().countOfRegionServers() <= 1) {</span> |
| <span class="source-line-no">687</span><span id="line-687"> return;</span> |
| <span class="source-line-no">688</span><span id="line-688"> }</span> |
| <span class="source-line-no">689</span><span id="line-689"> // This thread used to run whenever there was a change in the cluster. The ZooKeeper</span> |
| <span class="source-line-no">690</span><span id="line-690"> // childrenChanged notification came in before the nodeDeleted message and so this method</span> |
| <span class="source-line-no">691</span><span id="line-691"> // cold run before a ServerCrashProcedure could run. That meant that this thread could see</span> |
| <span class="source-line-no">692</span><span id="line-692"> // a Crashed Server before ServerCrashProcedure and it could find system regions on the</span> |
| <span class="source-line-no">693</span><span id="line-693"> // crashed server and go move them before ServerCrashProcedure had a chance; could be</span> |
| <span class="source-line-no">694</span><span id="line-694"> // dataloss too if WALs were not recovered.</span> |
| <span class="source-line-no">695</span><span id="line-695"> new Thread(() -> {</span> |
| <span class="source-line-no">696</span><span id="line-696"> try {</span> |
| <span class="source-line-no">697</span><span id="line-697"> synchronized (checkIfShouldMoveSystemRegionLock) {</span> |
| <span class="source-line-no">698</span><span id="line-698"> List<RegionPlan> plans = new ArrayList<>();</span> |
| <span class="source-line-no">699</span><span id="line-699"> // TODO: I don't think this code does a good job if all servers in cluster have same</span> |
| <span class="source-line-no">700</span><span id="line-700"> // version. It looks like it will schedule unnecessary moves.</span> |
| <span class="source-line-no">701</span><span id="line-701"> for (ServerName server : getExcludedServersForSystemTable()) {</span> |
| <span class="source-line-no">702</span><span id="line-702"> if (master.getServerManager().isServerDead(server)) {</span> |
| <span class="source-line-no">703</span><span id="line-703"> // TODO: See HBASE-18494 and HBASE-18495. Though getExcludedServersForSystemTable()</span> |
| <span class="source-line-no">704</span><span id="line-704"> // considers only online servers, the server could be queued for dead server</span> |
| <span class="source-line-no">705</span><span id="line-705"> // processing. As region assignments for crashed server is handled by</span> |
| <span class="source-line-no">706</span><span id="line-706"> // ServerCrashProcedure, do NOT handle them here. The goal is to handle this through</span> |
| <span class="source-line-no">707</span><span id="line-707"> // regular flow of LoadBalancer as a favored node and not to have this special</span> |
| <span class="source-line-no">708</span><span id="line-708"> // handling.</span> |
| <span class="source-line-no">709</span><span id="line-709"> continue;</span> |
| <span class="source-line-no">710</span><span id="line-710"> }</span> |
| <span class="source-line-no">711</span><span id="line-711"> List<RegionInfo> regionsShouldMove = getSystemTables(server);</span> |
| <span class="source-line-no">712</span><span id="line-712"> if (!regionsShouldMove.isEmpty()) {</span> |
| <span class="source-line-no">713</span><span id="line-713"> for (RegionInfo regionInfo : regionsShouldMove) {</span> |
| <span class="source-line-no">714</span><span id="line-714"> // null value for dest forces destination server to be selected by balancer</span> |
| <span class="source-line-no">715</span><span id="line-715"> RegionPlan plan = new RegionPlan(regionInfo, server, null);</span> |
| <span class="source-line-no">716</span><span id="line-716"> if (regionInfo.isMetaRegion()) {</span> |
| <span class="source-line-no">717</span><span id="line-717"> // Must move meta region first.</span> |
| <span class="source-line-no">718</span><span id="line-718"> LOG.info("Async MOVE of {} to newer Server={}", regionInfo.getEncodedName(),</span> |
| <span class="source-line-no">719</span><span id="line-719"> server);</span> |
| <span class="source-line-no">720</span><span id="line-720"> moveAsync(plan);</span> |
| <span class="source-line-no">721</span><span id="line-721"> } else {</span> |
| <span class="source-line-no">722</span><span id="line-722"> plans.add(plan);</span> |
| <span class="source-line-no">723</span><span id="line-723"> }</span> |
| <span class="source-line-no">724</span><span id="line-724"> }</span> |
| <span class="source-line-no">725</span><span id="line-725"> }</span> |
| <span class="source-line-no">726</span><span id="line-726"> for (RegionPlan plan : plans) {</span> |
| <span class="source-line-no">727</span><span id="line-727"> LOG.info("Async MOVE of {} to newer Server={}", plan.getRegionInfo().getEncodedName(),</span> |
| <span class="source-line-no">728</span><span id="line-728"> server);</span> |
| <span class="source-line-no">729</span><span id="line-729"> moveAsync(plan);</span> |
| <span class="source-line-no">730</span><span id="line-730"> }</span> |
| <span class="source-line-no">731</span><span id="line-731"> }</span> |
| <span class="source-line-no">732</span><span id="line-732"> }</span> |
| <span class="source-line-no">733</span><span id="line-733"> } catch (Throwable t) {</span> |
| <span class="source-line-no">734</span><span id="line-734"> LOG.error(t.toString(), t);</span> |
| <span class="source-line-no">735</span><span id="line-735"> }</span> |
| <span class="source-line-no">736</span><span id="line-736"> }).start();</span> |
| <span class="source-line-no">737</span><span id="line-737"> }</span> |
| <span class="source-line-no">738</span><span id="line-738"></span> |
| <span class="source-line-no">739</span><span id="line-739"> private List<RegionInfo> getSystemTables(ServerName serverName) {</span> |
| <span class="source-line-no">740</span><span id="line-740"> ServerStateNode serverNode = regionStates.getServerNode(serverName);</span> |
| <span class="source-line-no">741</span><span id="line-741"> if (serverNode == null) {</span> |
| <span class="source-line-no">742</span><span id="line-742"> return Collections.emptyList();</span> |
| <span class="source-line-no">743</span><span id="line-743"> }</span> |
| <span class="source-line-no">744</span><span id="line-744"> return serverNode.getSystemRegionInfoList();</span> |
| <span class="source-line-no">745</span><span id="line-745"> }</span> |
| <span class="source-line-no">746</span><span id="line-746"></span> |
| <span class="source-line-no">747</span><span id="line-747"> private void preTransitCheck(RegionStateNode regionNode, RegionState.State[] expectedStates)</span> |
| <span class="source-line-no">748</span><span id="line-748"> throws HBaseIOException {</span> |
| <span class="source-line-no">749</span><span id="line-749"> if (regionNode.getProcedure() != null) {</span> |
| <span class="source-line-no">750</span><span id="line-750"> throw new HBaseIOException(</span> |
| <span class="source-line-no">751</span><span id="line-751"> regionNode + " is currently in transition; pid=" + regionNode.getProcedure().getProcId());</span> |
| <span class="source-line-no">752</span><span id="line-752"> }</span> |
| <span class="source-line-no">753</span><span id="line-753"> if (!regionNode.isInState(expectedStates)) {</span> |
| <span class="source-line-no">754</span><span id="line-754"> throw new DoNotRetryRegionException(UNEXPECTED_STATE_REGION + regionNode);</span> |
| <span class="source-line-no">755</span><span id="line-755"> }</span> |
| <span class="source-line-no">756</span><span id="line-756"> if (isTableDisabled(regionNode.getTable())) {</span> |
| <span class="source-line-no">757</span><span id="line-757"> throw new DoNotRetryIOException(regionNode.getTable() + " is disabled for " + regionNode);</span> |
| <span class="source-line-no">758</span><span id="line-758"> }</span> |
| <span class="source-line-no">759</span><span id="line-759"> }</span> |
| <span class="source-line-no">760</span><span id="line-760"></span> |
| <span class="source-line-no">761</span><span id="line-761"> /**</span> |
| <span class="source-line-no">762</span><span id="line-762"> * Create an assign TransitRegionStateProcedure. Makes sure of RegionState. Throws exception if</span> |
| <span class="source-line-no">763</span><span id="line-763"> * not appropriate UNLESS override is set. Used by hbck2 but also by straightline</span> |
| <span class="source-line-no">764</span><span id="line-764"> * {@link #assign(RegionInfo, ServerName)} and {@link #assignAsync(RegionInfo, ServerName)}.</span> |
| <span class="source-line-no">765</span><span id="line-765"> * @see #createAssignProcedure(RegionStateNode, ServerName) for a version that does NO checking</span> |
| <span class="source-line-no">766</span><span id="line-766"> * used when only when no checking needed.</span> |
| <span class="source-line-no">767</span><span id="line-767"> * @param override If false, check RegionState is appropriate for assign; if not throw exception.</span> |
| <span class="source-line-no">768</span><span id="line-768"> */</span> |
| <span class="source-line-no">769</span><span id="line-769"> private TransitRegionStateProcedure createAssignProcedure(RegionInfo regionInfo, ServerName sn,</span> |
| <span class="source-line-no">770</span><span id="line-770"> boolean override, boolean force) throws IOException {</span> |
| <span class="source-line-no">771</span><span id="line-771"> RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo);</span> |
| <span class="source-line-no">772</span><span id="line-772"> regionNode.lock();</span> |
| <span class="source-line-no">773</span><span id="line-773"> try {</span> |
| <span class="source-line-no">774</span><span id="line-774"> if (override) {</span> |
| <span class="source-line-no">775</span><span id="line-775"> if (!force) {</span> |
| <span class="source-line-no">776</span><span id="line-776"> preTransitCheck(regionNode, STATES_EXPECTED_ON_ASSIGN);</span> |
| <span class="source-line-no">777</span><span id="line-777"> }</span> |
| <span class="source-line-no">778</span><span id="line-778"> if (regionNode.getProcedure() != null) {</span> |
| <span class="source-line-no">779</span><span id="line-779"> regionNode.unsetProcedure(regionNode.getProcedure());</span> |
| <span class="source-line-no">780</span><span id="line-780"> }</span> |
| <span class="source-line-no">781</span><span id="line-781"> } else {</span> |
| <span class="source-line-no">782</span><span id="line-782"> preTransitCheck(regionNode, STATES_EXPECTED_ON_ASSIGN);</span> |
| <span class="source-line-no">783</span><span id="line-783"> }</span> |
| <span class="source-line-no">784</span><span id="line-784"> assert regionNode.getProcedure() == null;</span> |
| <span class="source-line-no">785</span><span id="line-785"> return regionNode.setProcedure(</span> |
| <span class="source-line-no">786</span><span id="line-786"> TransitRegionStateProcedure.assign(getProcedureEnvironment(), regionInfo, sn));</span> |
| <span class="source-line-no">787</span><span id="line-787"> } finally {</span> |
| <span class="source-line-no">788</span><span id="line-788"> regionNode.unlock();</span> |
| <span class="source-line-no">789</span><span id="line-789"> }</span> |
| <span class="source-line-no">790</span><span id="line-790"> }</span> |
| <span class="source-line-no">791</span><span id="line-791"></span> |
| <span class="source-line-no">792</span><span id="line-792"> /**</span> |
| <span class="source-line-no">793</span><span id="line-793"> * Create an assign TransitRegionStateProcedure. Does NO checking of RegionState. Presumes</span> |
| <span class="source-line-no">794</span><span id="line-794"> * appriopriate state ripe for assign.</span> |
| <span class="source-line-no">795</span><span id="line-795"> * @see #createAssignProcedure(RegionInfo, ServerName, boolean, boolean)</span> |
| <span class="source-line-no">796</span><span id="line-796"> */</span> |
| <span class="source-line-no">797</span><span id="line-797"> private TransitRegionStateProcedure createAssignProcedure(RegionStateNode regionNode,</span> |
| <span class="source-line-no">798</span><span id="line-798"> ServerName targetServer) {</span> |
| <span class="source-line-no">799</span><span id="line-799"> regionNode.lock();</span> |
| <span class="source-line-no">800</span><span id="line-800"> try {</span> |
| <span class="source-line-no">801</span><span id="line-801"> return regionNode.setProcedure(TransitRegionStateProcedure.assign(getProcedureEnvironment(),</span> |
| <span class="source-line-no">802</span><span id="line-802"> regionNode.getRegionInfo(), targetServer));</span> |
| <span class="source-line-no">803</span><span id="line-803"> } finally {</span> |
| <span class="source-line-no">804</span><span id="line-804"> regionNode.unlock();</span> |
| <span class="source-line-no">805</span><span id="line-805"> }</span> |
| <span class="source-line-no">806</span><span id="line-806"> }</span> |
| <span class="source-line-no">807</span><span id="line-807"></span> |
| <span class="source-line-no">808</span><span id="line-808"> public long assign(RegionInfo regionInfo, ServerName sn) throws IOException {</span> |
| <span class="source-line-no">809</span><span id="line-809"> TransitRegionStateProcedure proc = createAssignProcedure(regionInfo, sn, false, false);</span> |
| <span class="source-line-no">810</span><span id="line-810"> ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), proc);</span> |
| <span class="source-line-no">811</span><span id="line-811"> return proc.getProcId();</span> |
| <span class="source-line-no">812</span><span id="line-812"> }</span> |
| <span class="source-line-no">813</span><span id="line-813"></span> |
| <span class="source-line-no">814</span><span id="line-814"> public long assign(RegionInfo regionInfo) throws IOException {</span> |
| <span class="source-line-no">815</span><span id="line-815"> return assign(regionInfo, null);</span> |
| <span class="source-line-no">816</span><span id="line-816"> }</span> |
| <span class="source-line-no">817</span><span id="line-817"></span> |
| <span class="source-line-no">818</span><span id="line-818"> /**</span> |
| <span class="source-line-no">819</span><span id="line-819"> * Submits a procedure that assigns a region to a target server without waiting for it to finish</span> |
| <span class="source-line-no">820</span><span id="line-820"> * @param regionInfo the region we would like to assign</span> |
| <span class="source-line-no">821</span><span id="line-821"> * @param sn target server name</span> |
| <span class="source-line-no">822</span><span id="line-822"> */</span> |
| <span class="source-line-no">823</span><span id="line-823"> public Future<byte[]> assignAsync(RegionInfo regionInfo, ServerName sn) throws IOException {</span> |
| <span class="source-line-no">824</span><span id="line-824"> return ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(),</span> |
| <span class="source-line-no">825</span><span id="line-825"> createAssignProcedure(regionInfo, sn, false, false));</span> |
| <span class="source-line-no">826</span><span id="line-826"> }</span> |
| <span class="source-line-no">827</span><span id="line-827"></span> |
| <span class="source-line-no">828</span><span id="line-828"> /**</span> |
| <span class="source-line-no">829</span><span id="line-829"> * Submits a procedure that assigns a region without waiting for it to finish</span> |
| <span class="source-line-no">830</span><span id="line-830"> * @param regionInfo the region we would like to assign</span> |
| <span class="source-line-no">831</span><span id="line-831"> */</span> |
| <span class="source-line-no">832</span><span id="line-832"> public Future<byte[]> assignAsync(RegionInfo regionInfo) throws IOException {</span> |
| <span class="source-line-no">833</span><span id="line-833"> return assignAsync(regionInfo, null);</span> |
| <span class="source-line-no">834</span><span id="line-834"> }</span> |
| <span class="source-line-no">835</span><span id="line-835"></span> |
| <span class="source-line-no">836</span><span id="line-836"> public long unassign(RegionInfo regionInfo) throws IOException {</span> |
| <span class="source-line-no">837</span><span id="line-837"> RegionStateNode regionNode = regionStates.getRegionStateNode(regionInfo);</span> |
| <span class="source-line-no">838</span><span id="line-838"> if (regionNode == null) {</span> |
| <span class="source-line-no">839</span><span id="line-839"> throw new UnknownRegionException("No RegionState found for " + regionInfo.getEncodedName());</span> |
| <span class="source-line-no">840</span><span id="line-840"> }</span> |
| <span class="source-line-no">841</span><span id="line-841"> TransitRegionStateProcedure proc;</span> |
| <span class="source-line-no">842</span><span id="line-842"> regionNode.lock();</span> |
| <span class="source-line-no">843</span><span id="line-843"> try {</span> |
| <span class="source-line-no">844</span><span id="line-844"> preTransitCheck(regionNode, STATES_EXPECTED_ON_UNASSIGN_OR_MOVE);</span> |
| <span class="source-line-no">845</span><span id="line-845"> proc = TransitRegionStateProcedure.unassign(getProcedureEnvironment(), regionInfo);</span> |
| <span class="source-line-no">846</span><span id="line-846"> regionNode.setProcedure(proc);</span> |
| <span class="source-line-no">847</span><span id="line-847"> } finally {</span> |
| <span class="source-line-no">848</span><span id="line-848"> regionNode.unlock();</span> |
| <span class="source-line-no">849</span><span id="line-849"> }</span> |
| <span class="source-line-no">850</span><span id="line-850"> ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), proc);</span> |
| <span class="source-line-no">851</span><span id="line-851"> return proc.getProcId();</span> |
| <span class="source-line-no">852</span><span id="line-852"> }</span> |
| <span class="source-line-no">853</span><span id="line-853"></span> |
| <span class="source-line-no">854</span><span id="line-854"> public TransitRegionStateProcedure createMoveRegionProcedure(RegionInfo regionInfo,</span> |
| <span class="source-line-no">855</span><span id="line-855"> ServerName targetServer) throws HBaseIOException {</span> |
| <span class="source-line-no">856</span><span id="line-856"> RegionStateNode regionNode = this.regionStates.getRegionStateNode(regionInfo);</span> |
| <span class="source-line-no">857</span><span id="line-857"> if (regionNode == null) {</span> |
| <span class="source-line-no">858</span><span id="line-858"> throw new UnknownRegionException(</span> |
| <span class="source-line-no">859</span><span id="line-859"> "No RegionStateNode found for " + regionInfo.getEncodedName() + "(Closed/Deleted?)");</span> |
| <span class="source-line-no">860</span><span id="line-860"> }</span> |
| <span class="source-line-no">861</span><span id="line-861"> TransitRegionStateProcedure proc;</span> |
| <span class="source-line-no">862</span><span id="line-862"> regionNode.lock();</span> |
| <span class="source-line-no">863</span><span id="line-863"> try {</span> |
| <span class="source-line-no">864</span><span id="line-864"> preTransitCheck(regionNode, STATES_EXPECTED_ON_UNASSIGN_OR_MOVE);</span> |
| <span class="source-line-no">865</span><span id="line-865"> regionNode.checkOnline();</span> |
| <span class="source-line-no">866</span><span id="line-866"> proc = TransitRegionStateProcedure.move(getProcedureEnvironment(), regionInfo, targetServer);</span> |
| <span class="source-line-no">867</span><span id="line-867"> regionNode.setProcedure(proc);</span> |
| <span class="source-line-no">868</span><span id="line-868"> } finally {</span> |
| <span class="source-line-no">869</span><span id="line-869"> regionNode.unlock();</span> |
| <span class="source-line-no">870</span><span id="line-870"> }</span> |
| <span class="source-line-no">871</span><span id="line-871"> return proc;</span> |
| <span class="source-line-no">872</span><span id="line-872"> }</span> |
| <span class="source-line-no">873</span><span id="line-873"></span> |
| <span class="source-line-no">874</span><span id="line-874"> public void move(RegionInfo regionInfo) throws IOException {</span> |
| <span class="source-line-no">875</span><span id="line-875"> TransitRegionStateProcedure proc = createMoveRegionProcedure(regionInfo, null);</span> |
| <span class="source-line-no">876</span><span id="line-876"> ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), proc);</span> |
| <span class="source-line-no">877</span><span id="line-877"> }</span> |
| <span class="source-line-no">878</span><span id="line-878"></span> |
| <span class="source-line-no">879</span><span id="line-879"> public Future<byte[]> moveAsync(RegionPlan regionPlan) throws HBaseIOException {</span> |
| <span class="source-line-no">880</span><span id="line-880"> TransitRegionStateProcedure proc =</span> |
| <span class="source-line-no">881</span><span id="line-881"> createMoveRegionProcedure(regionPlan.getRegionInfo(), regionPlan.getDestination());</span> |
| <span class="source-line-no">882</span><span id="line-882"> return ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(), proc);</span> |
| <span class="source-line-no">883</span><span id="line-883"> }</span> |
| <span class="source-line-no">884</span><span id="line-884"></span> |
| <span class="source-line-no">885</span><span id="line-885"> public Future<byte[]> balance(RegionPlan regionPlan) throws HBaseIOException {</span> |
| <span class="source-line-no">886</span><span id="line-886"> ServerName current =</span> |
| <span class="source-line-no">887</span><span id="line-887"> this.getRegionStates().getRegionAssignments().get(regionPlan.getRegionInfo());</span> |
| <span class="source-line-no">888</span><span id="line-888"> if (current == null || !current.equals(regionPlan.getSource())) {</span> |
| <span class="source-line-no">889</span><span id="line-889"> LOG.debug("Skip region plan {}, source server not match, current region location is {}",</span> |
| <span class="source-line-no">890</span><span id="line-890"> regionPlan, current == null ? "(null)" : current);</span> |
| <span class="source-line-no">891</span><span id="line-891"> return null;</span> |
| <span class="source-line-no">892</span><span id="line-892"> }</span> |
| <span class="source-line-no">893</span><span id="line-893"> return moveAsync(regionPlan);</span> |
| <span class="source-line-no">894</span><span id="line-894"> }</span> |
| <span class="source-line-no">895</span><span id="line-895"></span> |
| <span class="source-line-no">896</span><span id="line-896"> // ============================================================================================</span> |
| <span class="source-line-no">897</span><span id="line-897"> // RegionTransition procedures helpers</span> |
| <span class="source-line-no">898</span><span id="line-898"> // ============================================================================================</span> |
| <span class="source-line-no">899</span><span id="line-899"></span> |
| <span class="source-line-no">900</span><span id="line-900"> /**</span> |
| <span class="source-line-no">901</span><span id="line-901"> * Create round-robin assigns. Use on table creation to distribute out regions across cluster.</span> |
| <span class="source-line-no">902</span><span id="line-902"> * @return AssignProcedures made out of the passed in <code>hris</code> and a call to the balancer</span> |
| <span class="source-line-no">903</span><span id="line-903"> * to populate the assigns with targets chosen using round-robin (default balancer</span> |
| <span class="source-line-no">904</span><span id="line-904"> * scheme). If at assign-time, the target chosen is no longer up, thats fine, the</span> |
| <span class="source-line-no">905</span><span id="line-905"> * AssignProcedure will ask the balancer for a new target, and so on.</span> |
| <span class="source-line-no">906</span><span id="line-906"> */</span> |
| <span class="source-line-no">907</span><span id="line-907"> public TransitRegionStateProcedure[] createRoundRobinAssignProcedures(List<RegionInfo> hris,</span> |
| <span class="source-line-no">908</span><span id="line-908"> List<ServerName> serversToExclude) {</span> |
| <span class="source-line-no">909</span><span id="line-909"> if (hris.isEmpty()) {</span> |
| <span class="source-line-no">910</span><span id="line-910"> return new TransitRegionStateProcedure[0];</span> |
| <span class="source-line-no">911</span><span id="line-911"> }</span> |
| <span class="source-line-no">912</span><span id="line-912"></span> |
| <span class="source-line-no">913</span><span id="line-913"> if (</span> |
| <span class="source-line-no">914</span><span id="line-914"> serversToExclude != null && this.master.getServerManager().getOnlineServersList().size() == 1</span> |
| <span class="source-line-no">915</span><span id="line-915"> ) {</span> |
| <span class="source-line-no">916</span><span id="line-916"> LOG.debug("Only one region server found and hence going ahead with the assignment");</span> |
| <span class="source-line-no">917</span><span id="line-917"> serversToExclude = null;</span> |
| <span class="source-line-no">918</span><span id="line-918"> }</span> |
| <span class="source-line-no">919</span><span id="line-919"> try {</span> |
| <span class="source-line-no">920</span><span id="line-920"> // Ask the balancer to assign our regions. Pass the regions en masse. The balancer can do</span> |
| <span class="source-line-no">921</span><span id="line-921"> // a better job if it has all the assignments in the one lump.</span> |
| <span class="source-line-no">922</span><span id="line-922"> Map<ServerName, List<RegionInfo>> assignments = getBalancer().roundRobinAssignment(hris,</span> |
| <span class="source-line-no">923</span><span id="line-923"> this.master.getServerManager().createDestinationServersList(serversToExclude));</span> |
| <span class="source-line-no">924</span><span id="line-924"> // Return mid-method!</span> |
| <span class="source-line-no">925</span><span id="line-925"> return createAssignProcedures(assignments);</span> |
| <span class="source-line-no">926</span><span id="line-926"> } catch (IOException hioe) {</span> |
| <span class="source-line-no">927</span><span id="line-927"> LOG.warn("Failed roundRobinAssignment", hioe);</span> |
| <span class="source-line-no">928</span><span id="line-928"> }</span> |
| <span class="source-line-no">929</span><span id="line-929"> // If an error above, fall-through to this simpler assign. Last resort.</span> |
| <span class="source-line-no">930</span><span id="line-930"> return createAssignProcedures(hris);</span> |
| <span class="source-line-no">931</span><span id="line-931"> }</span> |
| <span class="source-line-no">932</span><span id="line-932"></span> |
| <span class="source-line-no">933</span><span id="line-933"> /**</span> |
| <span class="source-line-no">934</span><span id="line-934"> * Create round-robin assigns. Use on table creation to distribute out regions across cluster.</span> |
| <span class="source-line-no">935</span><span id="line-935"> * @return AssignProcedures made out of the passed in <code>hris</code> and a call to the balancer</span> |
| <span class="source-line-no">936</span><span id="line-936"> * to populate the assigns with targets chosen using round-robin (default balancer</span> |
| <span class="source-line-no">937</span><span id="line-937"> * scheme). If at assign-time, the target chosen is no longer up, thats fine, the</span> |
| <span class="source-line-no">938</span><span id="line-938"> * AssignProcedure will ask the balancer for a new target, and so on.</span> |
| <span class="source-line-no">939</span><span id="line-939"> */</span> |
| <span class="source-line-no">940</span><span id="line-940"> public TransitRegionStateProcedure[] createRoundRobinAssignProcedures(List<RegionInfo> hris) {</span> |
| <span class="source-line-no">941</span><span id="line-941"> return createRoundRobinAssignProcedures(hris, null);</span> |
| <span class="source-line-no">942</span><span id="line-942"> }</span> |
| <span class="source-line-no">943</span><span id="line-943"></span> |
| <span class="source-line-no">944</span><span id="line-944"> static int compare(TransitRegionStateProcedure left, TransitRegionStateProcedure right) {</span> |
| <span class="source-line-no">945</span><span id="line-945"> if (left.getRegion().isMetaRegion()) {</span> |
| <span class="source-line-no">946</span><span id="line-946"> if (right.getRegion().isMetaRegion()) {</span> |
| <span class="source-line-no">947</span><span id="line-947"> return RegionInfo.COMPARATOR.compare(left.getRegion(), right.getRegion());</span> |
| <span class="source-line-no">948</span><span id="line-948"> }</span> |
| <span class="source-line-no">949</span><span id="line-949"> return -1;</span> |
| <span class="source-line-no">950</span><span id="line-950"> } else if (right.getRegion().isMetaRegion()) {</span> |
| <span class="source-line-no">951</span><span id="line-951"> return +1;</span> |
| <span class="source-line-no">952</span><span id="line-952"> }</span> |
| <span class="source-line-no">953</span><span id="line-953"> if (left.getRegion().getTable().isSystemTable()) {</span> |
| <span class="source-line-no">954</span><span id="line-954"> if (right.getRegion().getTable().isSystemTable()) {</span> |
| <span class="source-line-no">955</span><span id="line-955"> return RegionInfo.COMPARATOR.compare(left.getRegion(), right.getRegion());</span> |
| <span class="source-line-no">956</span><span id="line-956"> }</span> |
| <span class="source-line-no">957</span><span id="line-957"> return -1;</span> |
| <span class="source-line-no">958</span><span id="line-958"> } else if (right.getRegion().getTable().isSystemTable()) {</span> |
| <span class="source-line-no">959</span><span id="line-959"> return +1;</span> |
| <span class="source-line-no">960</span><span id="line-960"> }</span> |
| <span class="source-line-no">961</span><span id="line-961"> return RegionInfo.COMPARATOR.compare(left.getRegion(), right.getRegion());</span> |
| <span class="source-line-no">962</span><span id="line-962"> }</span> |
| <span class="source-line-no">963</span><span id="line-963"></span> |
| <span class="source-line-no">964</span><span id="line-964"> /**</span> |
| <span class="source-line-no">965</span><span id="line-965"> * Create one TransitRegionStateProcedure to assign a region w/o specifying a target server. This</span> |
| <span class="source-line-no">966</span><span id="line-966"> * method is called from HBCK2.</span> |
| <span class="source-line-no">967</span><span id="line-967"> * @return an assign or null</span> |
| <span class="source-line-no">968</span><span id="line-968"> */</span> |
| <span class="source-line-no">969</span><span id="line-969"> public TransitRegionStateProcedure createOneAssignProcedure(RegionInfo ri, boolean override,</span> |
| <span class="source-line-no">970</span><span id="line-970"> boolean force) {</span> |
| <span class="source-line-no">971</span><span id="line-971"> TransitRegionStateProcedure trsp = null;</span> |
| <span class="source-line-no">972</span><span id="line-972"> try {</span> |
| <span class="source-line-no">973</span><span id="line-973"> trsp = createAssignProcedure(ri, null, override, force);</span> |
| <span class="source-line-no">974</span><span id="line-974"> } catch (IOException ioe) {</span> |
| <span class="source-line-no">975</span><span id="line-975"> LOG.info(</span> |
| <span class="source-line-no">976</span><span id="line-976"> "Failed {} assign, override={}"</span> |
| <span class="source-line-no">977</span><span id="line-977"> + (override ? "" : "; set override to by-pass state checks."),</span> |
| <span class="source-line-no">978</span><span id="line-978"> ri.getEncodedName(), override, ioe);</span> |
| <span class="source-line-no">979</span><span id="line-979"> }</span> |
| <span class="source-line-no">980</span><span id="line-980"> return trsp;</span> |
| <span class="source-line-no">981</span><span id="line-981"> }</span> |
| <span class="source-line-no">982</span><span id="line-982"></span> |
| <span class="source-line-no">983</span><span id="line-983"> /**</span> |
| <span class="source-line-no">984</span><span id="line-984"> * Create one TransitRegionStateProcedure to unassign a region. This method is called from HBCK2.</span> |
| <span class="source-line-no">985</span><span id="line-985"> * @return an unassign or null</span> |
| <span class="source-line-no">986</span><span id="line-986"> */</span> |
| <span class="source-line-no">987</span><span id="line-987"> public TransitRegionStateProcedure createOneUnassignProcedure(RegionInfo ri, boolean override,</span> |
| <span class="source-line-no">988</span><span id="line-988"> boolean force) {</span> |
| <span class="source-line-no">989</span><span id="line-989"> RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(ri);</span> |
| <span class="source-line-no">990</span><span id="line-990"> TransitRegionStateProcedure trsp = null;</span> |
| <span class="source-line-no">991</span><span id="line-991"> regionNode.lock();</span> |
| <span class="source-line-no">992</span><span id="line-992"> try {</span> |
| <span class="source-line-no">993</span><span id="line-993"> if (override) {</span> |
| <span class="source-line-no">994</span><span id="line-994"> if (!force) {</span> |
| <span class="source-line-no">995</span><span id="line-995"> preTransitCheck(regionNode, STATES_EXPECTED_ON_UNASSIGN_OR_MOVE);</span> |
| <span class="source-line-no">996</span><span id="line-996"> }</span> |
| <span class="source-line-no">997</span><span id="line-997"> if (regionNode.getProcedure() != null) {</span> |
| <span class="source-line-no">998</span><span id="line-998"> regionNode.unsetProcedure(regionNode.getProcedure());</span> |
| <span class="source-line-no">999</span><span id="line-999"> }</span> |
| <span class="source-line-no">1000</span><span id="line-1000"> } else {</span> |
| <span class="source-line-no">1001</span><span id="line-1001"> // This is where we could throw an exception; i.e. override is false.</span> |
| <span class="source-line-no">1002</span><span id="line-1002"> preTransitCheck(regionNode, STATES_EXPECTED_ON_UNASSIGN_OR_MOVE);</span> |
| <span class="source-line-no">1003</span><span id="line-1003"> }</span> |
| <span class="source-line-no">1004</span><span id="line-1004"> assert regionNode.getProcedure() == null;</span> |
| <span class="source-line-no">1005</span><span id="line-1005"> trsp =</span> |
| <span class="source-line-no">1006</span><span id="line-1006"> TransitRegionStateProcedure.unassign(getProcedureEnvironment(), regionNode.getRegionInfo());</span> |
| <span class="source-line-no">1007</span><span id="line-1007"> regionNode.setProcedure(trsp);</span> |
| <span class="source-line-no">1008</span><span id="line-1008"> } catch (IOException ioe) {</span> |
| <span class="source-line-no">1009</span><span id="line-1009"> // 'override' must be false here.</span> |
| <span class="source-line-no">1010</span><span id="line-1010"> LOG.info("Failed {} unassign, override=false; set override to by-pass state checks.",</span> |
| <span class="source-line-no">1011</span><span id="line-1011"> ri.getEncodedName(), ioe);</span> |
| <span class="source-line-no">1012</span><span id="line-1012"> } finally {</span> |
| <span class="source-line-no">1013</span><span id="line-1013"> regionNode.unlock();</span> |
| <span class="source-line-no">1014</span><span id="line-1014"> }</span> |
| <span class="source-line-no">1015</span><span id="line-1015"> return trsp;</span> |
| <span class="source-line-no">1016</span><span id="line-1016"> }</span> |
| <span class="source-line-no">1017</span><span id="line-1017"></span> |
| <span class="source-line-no">1018</span><span id="line-1018"> /**</span> |
| <span class="source-line-no">1019</span><span id="line-1019"> * Create an array of TransitRegionStateProcedure w/o specifying a target server. Used as fallback</span> |
| <span class="source-line-no">1020</span><span id="line-1020"> * of caller is unable to do {@link #createAssignProcedures(Map)}.</span> |
| <span class="source-line-no">1021</span><span id="line-1021"> * <p/></span> |
| <span class="source-line-no">1022</span><span id="line-1022"> * If no target server, at assign time, we will try to use the former location of the region if</span> |
| <span class="source-line-no">1023</span><span id="line-1023"> * one exists. This is how we 'retain' the old location across a server restart.</span> |
| <span class="source-line-no">1024</span><span id="line-1024"> * <p/></span> |
| <span class="source-line-no">1025</span><span id="line-1025"> * Should only be called when you can make sure that no one can touch these regions other than</span> |
| <span class="source-line-no">1026</span><span id="line-1026"> * you. For example, when you are creating or enabling table. Presumes all Regions are in</span> |
| <span class="source-line-no">1027</span><span id="line-1027"> * appropriate state ripe for assign; no checking of Region state is done in here.</span> |
| <span class="source-line-no">1028</span><span id="line-1028"> * @see #createAssignProcedures(Map)</span> |
| <span class="source-line-no">1029</span><span id="line-1029"> */</span> |
| <span class="source-line-no">1030</span><span id="line-1030"> public TransitRegionStateProcedure[] createAssignProcedures(List<RegionInfo> hris) {</span> |
| <span class="source-line-no">1031</span><span id="line-1031"> return hris.stream().map(hri -> regionStates.getOrCreateRegionStateNode(hri))</span> |
| <span class="source-line-no">1032</span><span id="line-1032"> .map(regionNode -> createAssignProcedure(regionNode, null)).sorted(AssignmentManager::compare)</span> |
| <span class="source-line-no">1033</span><span id="line-1033"> .toArray(TransitRegionStateProcedure[]::new);</span> |
| <span class="source-line-no">1034</span><span id="line-1034"> }</span> |
| <span class="source-line-no">1035</span><span id="line-1035"></span> |
| <span class="source-line-no">1036</span><span id="line-1036"> /**</span> |
| <span class="source-line-no">1037</span><span id="line-1037"> * Tied to {@link #createAssignProcedures(List)} in that it is called if caller is unable to run</span> |
| <span class="source-line-no">1038</span><span id="line-1038"> * this method. Presumes all Regions are in appropriate state ripe for assign; no checking of</span> |
| <span class="source-line-no">1039</span><span id="line-1039"> * Region state is done in here.</span> |
| <span class="source-line-no">1040</span><span id="line-1040"> * @param assignments Map of assignments from which we produce an array of AssignProcedures.</span> |
| <span class="source-line-no">1041</span><span id="line-1041"> * @return Assignments made from the passed in <code>assignments</code></span> |
| <span class="source-line-no">1042</span><span id="line-1042"> * @see #createAssignProcedures(List)</span> |
| <span class="source-line-no">1043</span><span id="line-1043"> */</span> |
| <span class="source-line-no">1044</span><span id="line-1044"> private TransitRegionStateProcedure[]</span> |
| <span class="source-line-no">1045</span><span id="line-1045"> createAssignProcedures(Map<ServerName, List<RegionInfo>> assignments) {</span> |
| <span class="source-line-no">1046</span><span id="line-1046"> return assignments.entrySet().stream()</span> |
| <span class="source-line-no">1047</span><span id="line-1047"> .flatMap(e -> e.getValue().stream().map(hri -> regionStates.getOrCreateRegionStateNode(hri))</span> |
| <span class="source-line-no">1048</span><span id="line-1048"> .map(regionNode -> createAssignProcedure(regionNode, e.getKey())))</span> |
| <span class="source-line-no">1049</span><span id="line-1049"> .sorted(AssignmentManager::compare).toArray(TransitRegionStateProcedure[]::new);</span> |
| <span class="source-line-no">1050</span><span id="line-1050"> }</span> |
| <span class="source-line-no">1051</span><span id="line-1051"></span> |
| <span class="source-line-no">1052</span><span id="line-1052"> // for creating unassign TRSP when disabling a table or closing excess region replicas</span> |
| <span class="source-line-no">1053</span><span id="line-1053"> private TransitRegionStateProcedure forceCreateUnssignProcedure(RegionStateNode regionNode) {</span> |
| <span class="source-line-no">1054</span><span id="line-1054"> regionNode.lock();</span> |
| <span class="source-line-no">1055</span><span id="line-1055"> try {</span> |
| <span class="source-line-no">1056</span><span id="line-1056"> if (regionNode.isInState(State.OFFLINE, State.CLOSED, State.SPLIT)) {</span> |
| <span class="source-line-no">1057</span><span id="line-1057"> return null;</span> |
| <span class="source-line-no">1058</span><span id="line-1058"> }</span> |
| <span class="source-line-no">1059</span><span id="line-1059"> // in general, a split parent should be in CLOSED or SPLIT state, but anyway, let's check it</span> |
| <span class="source-line-no">1060</span><span id="line-1060"> // here for safety</span> |
| <span class="source-line-no">1061</span><span id="line-1061"> if (regionNode.getRegionInfo().isSplit()) {</span> |
| <span class="source-line-no">1062</span><span id="line-1062"> LOG.warn("{} is a split parent but not in CLOSED or SPLIT state", regionNode);</span> |
| <span class="source-line-no">1063</span><span id="line-1063"> return null;</span> |
| <span class="source-line-no">1064</span><span id="line-1064"> }</span> |
| <span class="source-line-no">1065</span><span id="line-1065"> // As in DisableTableProcedure or ModifyTableProcedure, we will hold the xlock for table, so</span> |
| <span class="source-line-no">1066</span><span id="line-1066"> // we can make sure that this procedure has not been executed yet, as TRSP will hold the</span> |
| <span class="source-line-no">1067</span><span id="line-1067"> // shared lock for table all the time. So here we will unset it and when it is actually</span> |
| <span class="source-line-no">1068</span><span id="line-1068"> // executed, it will find that the attach procedure is not itself and quit immediately.</span> |
| <span class="source-line-no">1069</span><span id="line-1069"> if (regionNode.getProcedure() != null) {</span> |
| <span class="source-line-no">1070</span><span id="line-1070"> regionNode.unsetProcedure(regionNode.getProcedure());</span> |
| <span class="source-line-no">1071</span><span id="line-1071"> }</span> |
| <span class="source-line-no">1072</span><span id="line-1072"> return regionNode.setProcedure(TransitRegionStateProcedure.unassign(getProcedureEnvironment(),</span> |
| <span class="source-line-no">1073</span><span id="line-1073"> regionNode.getRegionInfo()));</span> |
| <span class="source-line-no">1074</span><span id="line-1074"> } finally {</span> |
| <span class="source-line-no">1075</span><span id="line-1075"> regionNode.unlock();</span> |
| <span class="source-line-no">1076</span><span id="line-1076"> }</span> |
| <span class="source-line-no">1077</span><span id="line-1077"> }</span> |
| <span class="source-line-no">1078</span><span id="line-1078"></span> |
| <span class="source-line-no">1079</span><span id="line-1079"> /**</span> |
| <span class="source-line-no">1080</span><span id="line-1080"> * Called by DisableTableProcedure to unassign all the regions for a table.</span> |
| <span class="source-line-no">1081</span><span id="line-1081"> */</span> |
| <span class="source-line-no">1082</span><span id="line-1082"> public TransitRegionStateProcedure[] createUnassignProceduresForDisabling(TableName tableName) {</span> |
| <span class="source-line-no">1083</span><span id="line-1083"> return regionStates.getTableRegionStateNodes(tableName).stream()</span> |
| <span class="source-line-no">1084</span><span id="line-1084"> .map(this::forceCreateUnssignProcedure).filter(p -> p != null)</span> |
| <span class="source-line-no">1085</span><span id="line-1085"> .toArray(TransitRegionStateProcedure[]::new);</span> |
| <span class="source-line-no">1086</span><span id="line-1086"> }</span> |
| <span class="source-line-no">1087</span><span id="line-1087"></span> |
| <span class="source-line-no">1088</span><span id="line-1088"> private int submitUnassignProcedure(TableName tableName,</span> |
| <span class="source-line-no">1089</span><span id="line-1089"> Function<RegionStateNode, Boolean> shouldSubmit, Consumer<RegionStateNode> logRIT,</span> |
| <span class="source-line-no">1090</span><span id="line-1090"> Consumer<TransitRegionStateProcedure> submit) {</span> |
| <span class="source-line-no">1091</span><span id="line-1091"> int inTransitionCount = 0;</span> |
| <span class="source-line-no">1092</span><span id="line-1092"> for (RegionStateNode regionNode : regionStates.getTableRegionStateNodes(tableName)) {</span> |
| <span class="source-line-no">1093</span><span id="line-1093"> regionNode.lock();</span> |
| <span class="source-line-no">1094</span><span id="line-1094"> try {</span> |
| <span class="source-line-no">1095</span><span id="line-1095"> if (shouldSubmit.apply(regionNode)) {</span> |
| <span class="source-line-no">1096</span><span id="line-1096"> if (regionNode.isInTransition()) {</span> |
| <span class="source-line-no">1097</span><span id="line-1097"> logRIT.accept(regionNode);</span> |
| <span class="source-line-no">1098</span><span id="line-1098"> inTransitionCount++;</span> |
| <span class="source-line-no">1099</span><span id="line-1099"> continue;</span> |
| <span class="source-line-no">1100</span><span id="line-1100"> }</span> |
| <span class="source-line-no">1101</span><span id="line-1101"> if (regionNode.isInState(State.OFFLINE, State.CLOSED, State.SPLIT)) {</span> |
| <span class="source-line-no">1102</span><span id="line-1102"> continue;</span> |
| <span class="source-line-no">1103</span><span id="line-1103"> }</span> |
| <span class="source-line-no">1104</span><span id="line-1104"> submit.accept(regionNode.setProcedure(TransitRegionStateProcedure</span> |
| <span class="source-line-no">1105</span><span id="line-1105"> .unassign(getProcedureEnvironment(), regionNode.getRegionInfo())));</span> |
| <span class="source-line-no">1106</span><span id="line-1106"> }</span> |
| <span class="source-line-no">1107</span><span id="line-1107"> } finally {</span> |
| <span class="source-line-no">1108</span><span id="line-1108"> regionNode.unlock();</span> |
| <span class="source-line-no">1109</span><span id="line-1109"> }</span> |
| <span class="source-line-no">1110</span><span id="line-1110"> }</span> |
| <span class="source-line-no">1111</span><span id="line-1111"> return inTransitionCount;</span> |
| <span class="source-line-no">1112</span><span id="line-1112"> }</span> |
| <span class="source-line-no">1113</span><span id="line-1113"></span> |
| <span class="source-line-no">1114</span><span id="line-1114"> /**</span> |
| <span class="source-line-no">1115</span><span id="line-1115"> * Called by DisableTableProcedure to unassign all regions for a table. Will skip submit unassign</span> |
| <span class="source-line-no">1116</span><span id="line-1116"> * procedure if the region is in transition, so you may need to call this method multiple times.</span> |
| <span class="source-line-no">1117</span><span id="line-1117"> * @param tableName the table for closing excess region replicas</span> |
| <span class="source-line-no">1118</span><span id="line-1118"> * @param submit for submitting procedure</span> |
| <span class="source-line-no">1119</span><span id="line-1119"> * @return the number of regions in transition that we can not schedule unassign procedures</span> |
| <span class="source-line-no">1120</span><span id="line-1120"> */</span> |
| <span class="source-line-no">1121</span><span id="line-1121"> public int submitUnassignProcedureForDisablingTable(TableName tableName,</span> |
| <span class="source-line-no">1122</span><span id="line-1122"> Consumer<TransitRegionStateProcedure> submit) {</span> |
| <span class="source-line-no">1123</span><span id="line-1123"> return submitUnassignProcedure(tableName, rn -> true,</span> |
| <span class="source-line-no">1124</span><span id="line-1124"> rn -> LOG.debug("skip scheduling unassign procedure for {} when closing table regions "</span> |
| <span class="source-line-no">1125</span><span id="line-1125"> + "for disabling since it is in transition", rn),</span> |
| <span class="source-line-no">1126</span><span id="line-1126"> submit);</span> |
| <span class="source-line-no">1127</span><span id="line-1127"> }</span> |
| <span class="source-line-no">1128</span><span id="line-1128"></span> |
| <span class="source-line-no">1129</span><span id="line-1129"> /**</span> |
| <span class="source-line-no">1130</span><span id="line-1130"> * Called by ModifyTableProcedure to unassign all the excess region replicas for a table. Will</span> |
| <span class="source-line-no">1131</span><span id="line-1131"> * skip submit unassign procedure if the region is in transition, so you may need to call this</span> |
| <span class="source-line-no">1132</span><span id="line-1132"> * method multiple times.</span> |
| <span class="source-line-no">1133</span><span id="line-1133"> * @param tableName the table for closing excess region replicas</span> |
| <span class="source-line-no">1134</span><span id="line-1134"> * @param newReplicaCount the new replica count, should be less than current replica count</span> |
| <span class="source-line-no">1135</span><span id="line-1135"> * @param submit for submitting procedure</span> |
| <span class="source-line-no">1136</span><span id="line-1136"> * @return the number of regions in transition that we can not schedule unassign procedures</span> |
| <span class="source-line-no">1137</span><span id="line-1137"> */</span> |
| <span class="source-line-no">1138</span><span id="line-1138"> public int submitUnassignProcedureForClosingExcessRegionReplicas(TableName tableName,</span> |
| <span class="source-line-no">1139</span><span id="line-1139"> int newReplicaCount, Consumer<TransitRegionStateProcedure> submit) {</span> |
| <span class="source-line-no">1140</span><span id="line-1140"> return submitUnassignProcedure(tableName,</span> |
| <span class="source-line-no">1141</span><span id="line-1141"> rn -> rn.getRegionInfo().getReplicaId() >= newReplicaCount,</span> |
| <span class="source-line-no">1142</span><span id="line-1142"> rn -> LOG.debug("skip scheduling unassign procedure for {} when closing excess region "</span> |
| <span class="source-line-no">1143</span><span id="line-1143"> + "replicas since it is in transition", rn),</span> |
| <span class="source-line-no">1144</span><span id="line-1144"> submit);</span> |
| <span class="source-line-no">1145</span><span id="line-1145"> }</span> |
| <span class="source-line-no">1146</span><span id="line-1146"></span> |
| <span class="source-line-no">1147</span><span id="line-1147"> private int numberOfUnclosedRegions(TableName tableName,</span> |
| <span class="source-line-no">1148</span><span id="line-1148"> Function<RegionStateNode, Boolean> shouldSubmit) {</span> |
| <span class="source-line-no">1149</span><span id="line-1149"> int unclosed = 0;</span> |
| <span class="source-line-no">1150</span><span id="line-1150"> for (RegionStateNode regionNode : regionStates.getTableRegionStateNodes(tableName)) {</span> |
| <span class="source-line-no">1151</span><span id="line-1151"> regionNode.lock();</span> |
| <span class="source-line-no">1152</span><span id="line-1152"> try {</span> |
| <span class="source-line-no">1153</span><span id="line-1153"> if (shouldSubmit.apply(regionNode)) {</span> |
| <span class="source-line-no">1154</span><span id="line-1154"> if (!regionNode.isInState(State.OFFLINE, State.CLOSED, State.SPLIT)) {</span> |
| <span class="source-line-no">1155</span><span id="line-1155"> unclosed++;</span> |
| <span class="source-line-no">1156</span><span id="line-1156"> }</span> |
| <span class="source-line-no">1157</span><span id="line-1157"> }</span> |
| <span class="source-line-no">1158</span><span id="line-1158"> } finally {</span> |
| <span class="source-line-no">1159</span><span id="line-1159"> regionNode.unlock();</span> |
| <span class="source-line-no">1160</span><span id="line-1160"> }</span> |
| <span class="source-line-no">1161</span><span id="line-1161"> }</span> |
| <span class="source-line-no">1162</span><span id="line-1162"> return unclosed;</span> |
| <span class="source-line-no">1163</span><span id="line-1163"> }</span> |
| <span class="source-line-no">1164</span><span id="line-1164"></span> |
| <span class="source-line-no">1165</span><span id="line-1165"> public int numberOfUnclosedRegionsForDisabling(TableName tableName) {</span> |
| <span class="source-line-no">1166</span><span id="line-1166"> return numberOfUnclosedRegions(tableName, rn -> true);</span> |
| <span class="source-line-no">1167</span><span id="line-1167"> }</span> |
| <span class="source-line-no">1168</span><span id="line-1168"></span> |
| <span class="source-line-no">1169</span><span id="line-1169"> public int numberOfUnclosedExcessRegionReplicas(TableName tableName, int newReplicaCount) {</span> |
| <span class="source-line-no">1170</span><span id="line-1170"> return numberOfUnclosedRegions(tableName,</span> |
| <span class="source-line-no">1171</span><span id="line-1171"> rn -> rn.getRegionInfo().getReplicaId() >= newReplicaCount);</span> |
| <span class="source-line-no">1172</span><span id="line-1172"> }</span> |
| <span class="source-line-no">1173</span><span id="line-1173"></span> |
| <span class="source-line-no">1174</span><span id="line-1174"> public SplitTableRegionProcedure createSplitProcedure(final RegionInfo regionToSplit,</span> |
| <span class="source-line-no">1175</span><span id="line-1175"> final byte[] splitKey) throws IOException {</span> |
| <span class="source-line-no">1176</span><span id="line-1176"> return new SplitTableRegionProcedure(getProcedureEnvironment(), regionToSplit, splitKey);</span> |
| <span class="source-line-no">1177</span><span id="line-1177"> }</span> |
| <span class="source-line-no">1178</span><span id="line-1178"></span> |
| <span class="source-line-no">1179</span><span id="line-1179"> public TruncateRegionProcedure createTruncateRegionProcedure(final RegionInfo regionToTruncate)</span> |
| <span class="source-line-no">1180</span><span id="line-1180"> throws IOException {</span> |
| <span class="source-line-no">1181</span><span id="line-1181"> return new TruncateRegionProcedure(getProcedureEnvironment(), regionToTruncate);</span> |
| <span class="source-line-no">1182</span><span id="line-1182"> }</span> |
| <span class="source-line-no">1183</span><span id="line-1183"></span> |
| <span class="source-line-no">1184</span><span id="line-1184"> public MergeTableRegionsProcedure createMergeProcedure(RegionInfo... ris) throws IOException {</span> |
| <span class="source-line-no">1185</span><span id="line-1185"> return new MergeTableRegionsProcedure(getProcedureEnvironment(), ris, false);</span> |
| <span class="source-line-no">1186</span><span id="line-1186"> }</span> |
| <span class="source-line-no">1187</span><span id="line-1187"></span> |
| <span class="source-line-no">1188</span><span id="line-1188"> /**</span> |
| <span class="source-line-no">1189</span><span id="line-1189"> * Delete the region states. This is called by "DeleteTable"</span> |
| <span class="source-line-no">1190</span><span id="line-1190"> */</span> |
| <span class="source-line-no">1191</span><span id="line-1191"> public void deleteTable(final TableName tableName) throws IOException {</span> |
| <span class="source-line-no">1192</span><span id="line-1192"> final ArrayList<RegionInfo> regions = regionStates.getTableRegionsInfo(tableName);</span> |
| <span class="source-line-no">1193</span><span id="line-1193"> regionStateStore.deleteRegions(regions);</span> |
| <span class="source-line-no">1194</span><span id="line-1194"> for (int i = 0; i < regions.size(); ++i) {</span> |
| <span class="source-line-no">1195</span><span id="line-1195"> final RegionInfo regionInfo = regions.get(i);</span> |
| <span class="source-line-no">1196</span><span id="line-1196"> regionStates.deleteRegion(regionInfo);</span> |
| <span class="source-line-no">1197</span><span id="line-1197"> }</span> |
| <span class="source-line-no">1198</span><span id="line-1198"> }</span> |
| <span class="source-line-no">1199</span><span id="line-1199"></span> |
| <span class="source-line-no">1200</span><span id="line-1200"> // ============================================================================================</span> |
| <span class="source-line-no">1201</span><span id="line-1201"> // RS Region Transition Report helpers</span> |
| <span class="source-line-no">1202</span><span id="line-1202"> // ============================================================================================</span> |
| <span class="source-line-no">1203</span><span id="line-1203"> private void reportRegionStateTransition(ReportRegionStateTransitionResponse.Builder builder,</span> |
| <span class="source-line-no">1204</span><span id="line-1204"> ServerStateNode serverNode, List<RegionStateTransition> transitionList) throws IOException {</span> |
| <span class="source-line-no">1205</span><span id="line-1205"> for (RegionStateTransition transition : transitionList) {</span> |
| <span class="source-line-no">1206</span><span id="line-1206"> switch (transition.getTransitionCode()) {</span> |
| <span class="source-line-no">1207</span><span id="line-1207"> case OPENED:</span> |
| <span class="source-line-no">1208</span><span id="line-1208"> case FAILED_OPEN:</span> |
| <span class="source-line-no">1209</span><span id="line-1209"> case CLOSED:</span> |
| <span class="source-line-no">1210</span><span id="line-1210"> assert transition.getRegionInfoCount() == 1 : transition;</span> |
| <span class="source-line-no">1211</span><span id="line-1211"> final RegionInfo hri = ProtobufUtil.toRegionInfo(transition.getRegionInfo(0));</span> |
| <span class="source-line-no">1212</span><span id="line-1212"> long procId =</span> |
| <span class="source-line-no">1213</span><span id="line-1213"> transition.getProcIdCount() > 0 ? transition.getProcId(0) : Procedure.NO_PROC_ID;</span> |
| <span class="source-line-no">1214</span><span id="line-1214"> updateRegionTransition(serverNode, transition.getTransitionCode(), hri,</span> |
| <span class="source-line-no">1215</span><span id="line-1215"> transition.hasOpenSeqNum() ? transition.getOpenSeqNum() : HConstants.NO_SEQNUM, procId);</span> |
| <span class="source-line-no">1216</span><span id="line-1216"> break;</span> |
| <span class="source-line-no">1217</span><span id="line-1217"> case READY_TO_SPLIT:</span> |
| <span class="source-line-no">1218</span><span id="line-1218"> case SPLIT:</span> |
| <span class="source-line-no">1219</span><span id="line-1219"> case SPLIT_REVERTED:</span> |
| <span class="source-line-no">1220</span><span id="line-1220"> assert transition.getRegionInfoCount() == 3 : transition;</span> |
| <span class="source-line-no">1221</span><span id="line-1221"> final RegionInfo parent = ProtobufUtil.toRegionInfo(transition.getRegionInfo(0));</span> |
| <span class="source-line-no">1222</span><span id="line-1222"> final RegionInfo splitA = ProtobufUtil.toRegionInfo(transition.getRegionInfo(1));</span> |
| <span class="source-line-no">1223</span><span id="line-1223"> final RegionInfo splitB = ProtobufUtil.toRegionInfo(transition.getRegionInfo(2));</span> |
| <span class="source-line-no">1224</span><span id="line-1224"> updateRegionSplitTransition(serverNode, transition.getTransitionCode(), parent, splitA,</span> |
| <span class="source-line-no">1225</span><span id="line-1225"> splitB);</span> |
| <span class="source-line-no">1226</span><span id="line-1226"> break;</span> |
| <span class="source-line-no">1227</span><span id="line-1227"> case READY_TO_MERGE:</span> |
| <span class="source-line-no">1228</span><span id="line-1228"> case MERGED:</span> |
| <span class="source-line-no">1229</span><span id="line-1229"> case MERGE_REVERTED:</span> |
| <span class="source-line-no">1230</span><span id="line-1230"> assert transition.getRegionInfoCount() == 3 : transition;</span> |
| <span class="source-line-no">1231</span><span id="line-1231"> final RegionInfo merged = ProtobufUtil.toRegionInfo(transition.getRegionInfo(0));</span> |
| <span class="source-line-no">1232</span><span id="line-1232"> final RegionInfo mergeA = ProtobufUtil.toRegionInfo(transition.getRegionInfo(1));</span> |
| <span class="source-line-no">1233</span><span id="line-1233"> final RegionInfo mergeB = ProtobufUtil.toRegionInfo(transition.getRegionInfo(2));</span> |
| <span class="source-line-no">1234</span><span id="line-1234"> updateRegionMergeTransition(serverNode, transition.getTransitionCode(), merged, mergeA,</span> |
| <span class="source-line-no">1235</span><span id="line-1235"> mergeB);</span> |
| <span class="source-line-no">1236</span><span id="line-1236"> break;</span> |
| <span class="source-line-no">1237</span><span id="line-1237"> }</span> |
| <span class="source-line-no">1238</span><span id="line-1238"> }</span> |
| <span class="source-line-no">1239</span><span id="line-1239"> }</span> |
| <span class="source-line-no">1240</span><span id="line-1240"></span> |
| <span class="source-line-no">1241</span><span id="line-1241"> public ReportRegionStateTransitionResponse reportRegionStateTransition(</span> |
| <span class="source-line-no">1242</span><span id="line-1242"> final ReportRegionStateTransitionRequest req) throws PleaseHoldException {</span> |
| <span class="source-line-no">1243</span><span id="line-1243"> ReportRegionStateTransitionResponse.Builder builder =</span> |
| <span class="source-line-no">1244</span><span id="line-1244"> ReportRegionStateTransitionResponse.newBuilder();</span> |
| <span class="source-line-no">1245</span><span id="line-1245"> ServerName serverName = ProtobufUtil.toServerName(req.getServer());</span> |
| <span class="source-line-no">1246</span><span id="line-1246"> ServerStateNode serverNode = regionStates.getServerNode(serverName);</span> |
| <span class="source-line-no">1247</span><span id="line-1247"> if (serverNode == null) {</span> |
| <span class="source-line-no">1248</span><span id="line-1248"> LOG.warn("No server node for {}", serverName);</span> |
| <span class="source-line-no">1249</span><span id="line-1249"> builder.setErrorMessage("No server node for " + serverName);</span> |
| <span class="source-line-no">1250</span><span id="line-1250"> return builder.build();</span> |
| <span class="source-line-no">1251</span><span id="line-1251"> }</span> |
| <span class="source-line-no">1252</span><span id="line-1252"> // here we have to acquire a read lock instead of a simple exclusive lock. This is because that</span> |
| <span class="source-line-no">1253</span><span id="line-1253"> // we should not block other reportRegionStateTransition call from the same region server. This</span> |
| <span class="source-line-no">1254</span><span id="line-1254"> // is not only about performance, but also to prevent dead lock. Think of the meta region is</span> |
| <span class="source-line-no">1255</span><span id="line-1255"> // also on the same region server and you hold the lock which blocks the</span> |
| <span class="source-line-no">1256</span><span id="line-1256"> // reportRegionStateTransition for meta, and since meta is not online, you will block inside the</span> |
| <span class="source-line-no">1257</span><span id="line-1257"> // lock protection to wait for meta online...</span> |
| <span class="source-line-no">1258</span><span id="line-1258"> serverNode.readLock().lock();</span> |
| <span class="source-line-no">1259</span><span id="line-1259"> try {</span> |
| <span class="source-line-no">1260</span><span id="line-1260"> // we only accept reportRegionStateTransition if the region server is online, see the comment</span> |
| <span class="source-line-no">1261</span><span id="line-1261"> // above in submitServerCrash method and HBASE-21508 for more details.</span> |
| <span class="source-line-no">1262</span><span id="line-1262"> if (serverNode.isInState(ServerState.ONLINE)) {</span> |
| <span class="source-line-no">1263</span><span id="line-1263"> try {</span> |
| <span class="source-line-no">1264</span><span id="line-1264"> reportRegionStateTransition(builder, serverNode, req.getTransitionList());</span> |
| <span class="source-line-no">1265</span><span id="line-1265"> } catch (PleaseHoldException e) {</span> |
| <span class="source-line-no">1266</span><span id="line-1266"> LOG.trace("Failed transition ", e);</span> |
| <span class="source-line-no">1267</span><span id="line-1267"> throw e;</span> |
| <span class="source-line-no">1268</span><span id="line-1268"> } catch (UnsupportedOperationException | IOException e) {</span> |
| <span class="source-line-no">1269</span><span id="line-1269"> // TODO: at the moment we have a single error message and the RS will abort</span> |
| <span class="source-line-no">1270</span><span id="line-1270"> // if the master says that one of the region transitions failed.</span> |
| <span class="source-line-no">1271</span><span id="line-1271"> LOG.warn("Failed transition", e);</span> |
| <span class="source-line-no">1272</span><span id="line-1272"> builder.setErrorMessage("Failed transition " + e.getMessage());</span> |
| <span class="source-line-no">1273</span><span id="line-1273"> }</span> |
| <span class="source-line-no">1274</span><span id="line-1274"> } else {</span> |
| <span class="source-line-no">1275</span><span id="line-1275"> LOG.warn("The region server {} is already dead, skip reportRegionStateTransition call",</span> |
| <span class="source-line-no">1276</span><span id="line-1276"> serverName);</span> |
| <span class="source-line-no">1277</span><span id="line-1277"> builder.setErrorMessage("You are dead");</span> |
| <span class="source-line-no">1278</span><span id="line-1278"> }</span> |
| <span class="source-line-no">1279</span><span id="line-1279"> } finally {</span> |
| <span class="source-line-no">1280</span><span id="line-1280"> serverNode.readLock().unlock();</span> |
| <span class="source-line-no">1281</span><span id="line-1281"> }</span> |
| <span class="source-line-no">1282</span><span id="line-1282"></span> |
| <span class="source-line-no">1283</span><span id="line-1283"> return builder.build();</span> |
| <span class="source-line-no">1284</span><span id="line-1284"> }</span> |
| <span class="source-line-no">1285</span><span id="line-1285"></span> |
| <span class="source-line-no">1286</span><span id="line-1286"> private void updateRegionTransition(ServerStateNode serverNode, TransitionCode state,</span> |
| <span class="source-line-no">1287</span><span id="line-1287"> RegionInfo regionInfo, long seqId, long procId) throws IOException {</span> |
| <span class="source-line-no">1288</span><span id="line-1288"> checkMetaLoaded(regionInfo);</span> |
| <span class="source-line-no">1289</span><span id="line-1289"></span> |
| <span class="source-line-no">1290</span><span id="line-1290"> RegionStateNode regionNode = regionStates.getRegionStateNode(regionInfo);</span> |
| <span class="source-line-no">1291</span><span id="line-1291"> if (regionNode == null) {</span> |
| <span class="source-line-no">1292</span><span id="line-1292"> // the table/region is gone. maybe a delete, split, merge</span> |
| <span class="source-line-no">1293</span><span id="line-1293"> throw new UnexpectedStateException(String.format(</span> |
| <span class="source-line-no">1294</span><span id="line-1294"> "Server %s was trying to transition region %s to %s. but Region is not known.",</span> |
| <span class="source-line-no">1295</span><span id="line-1295"> serverNode.getServerName(), regionInfo, state));</span> |
| <span class="source-line-no">1296</span><span id="line-1296"> }</span> |
| <span class="source-line-no">1297</span><span id="line-1297"> LOG.trace("Update region transition serverName={} region={} regionState={}",</span> |
| <span class="source-line-no">1298</span><span id="line-1298"> serverNode.getServerName(), regionNode, state);</span> |
| <span class="source-line-no">1299</span><span id="line-1299"></span> |
| <span class="source-line-no">1300</span><span id="line-1300"> regionNode.lock();</span> |
| <span class="source-line-no">1301</span><span id="line-1301"> try {</span> |
| <span class="source-line-no">1302</span><span id="line-1302"> if (!reportTransition(regionNode, serverNode, state, seqId, procId)) {</span> |
| <span class="source-line-no">1303</span><span id="line-1303"> // Don't log WARN if shutting down cluster; during shutdown. Avoid the below messages:</span> |
| <span class="source-line-no">1304</span><span id="line-1304"> // 2018-08-13 10:45:10,551 WARN ...AssignmentManager: No matching procedure found for</span> |
| <span class="source-line-no">1305</span><span id="line-1305"> // rit=OPEN, location=ve0538.halxg.cloudera.com,16020,1533493000958,</span> |
| <span class="source-line-no">1306</span><span id="line-1306"> // table=IntegrationTestBigLinkedList, region=65ab289e2fc1530df65f6c3d7cde7aa5 transition</span> |
| <span class="source-line-no">1307</span><span id="line-1307"> // to CLOSED</span> |
| <span class="source-line-no">1308</span><span id="line-1308"> // These happen because on cluster shutdown, we currently let the RegionServers close</span> |
| <span class="source-line-no">1309</span><span id="line-1309"> // regions. This is the only time that region close is not run by the Master (so cluster</span> |
| <span class="source-line-no">1310</span><span id="line-1310"> // goes down fast). Consider changing it so Master runs all shutdowns.</span> |
| <span class="source-line-no">1311</span><span id="line-1311"> if (</span> |
| <span class="source-line-no">1312</span><span id="line-1312"> this.master.getServerManager().isClusterShutdown() && state.equals(TransitionCode.CLOSED)</span> |
| <span class="source-line-no">1313</span><span id="line-1313"> ) {</span> |
| <span class="source-line-no">1314</span><span id="line-1314"> LOG.info("RegionServer {} {}", state, regionNode.getRegionInfo().getEncodedName());</span> |
| <span class="source-line-no">1315</span><span id="line-1315"> } else {</span> |
| <span class="source-line-no">1316</span><span id="line-1316"> LOG.warn("No matching procedure found for {} transition on {} to {}",</span> |
| <span class="source-line-no">1317</span><span id="line-1317"> serverNode.getServerName(), regionNode, state);</span> |
| <span class="source-line-no">1318</span><span id="line-1318"> }</span> |
| <span class="source-line-no">1319</span><span id="line-1319"> }</span> |
| <span class="source-line-no">1320</span><span id="line-1320"> } finally {</span> |
| <span class="source-line-no">1321</span><span id="line-1321"> regionNode.unlock();</span> |
| <span class="source-line-no">1322</span><span id="line-1322"> }</span> |
| <span class="source-line-no">1323</span><span id="line-1323"> }</span> |
| <span class="source-line-no">1324</span><span id="line-1324"></span> |
| <span class="source-line-no">1325</span><span id="line-1325"> private boolean reportTransition(RegionStateNode regionNode, ServerStateNode serverNode,</span> |
| <span class="source-line-no">1326</span><span id="line-1326"> TransitionCode state, long seqId, long procId) throws IOException {</span> |
| <span class="source-line-no">1327</span><span id="line-1327"> ServerName serverName = serverNode.getServerName();</span> |
| <span class="source-line-no">1328</span><span id="line-1328"> TransitRegionStateProcedure proc = regionNode.getProcedure();</span> |
| <span class="source-line-no">1329</span><span id="line-1329"> if (proc == null) {</span> |
| <span class="source-line-no">1330</span><span id="line-1330"> return false;</span> |
| <span class="source-line-no">1331</span><span id="line-1331"> }</span> |
| <span class="source-line-no">1332</span><span id="line-1332"> proc.reportTransition(master.getMasterProcedureExecutor().getEnvironment(), regionNode,</span> |
| <span class="source-line-no">1333</span><span id="line-1333"> serverName, state, seqId, procId);</span> |
| <span class="source-line-no">1334</span><span id="line-1334"> return true;</span> |
| <span class="source-line-no">1335</span><span id="line-1335"> }</span> |
| <span class="source-line-no">1336</span><span id="line-1336"></span> |
| <span class="source-line-no">1337</span><span id="line-1337"> private void updateRegionSplitTransition(final ServerStateNode serverNode,</span> |
| <span class="source-line-no">1338</span><span id="line-1338"> final TransitionCode state, final RegionInfo parent, final RegionInfo hriA,</span> |
| <span class="source-line-no">1339</span><span id="line-1339"> final RegionInfo hriB) throws IOException {</span> |
| <span class="source-line-no">1340</span><span id="line-1340"> checkMetaLoaded(parent);</span> |
| <span class="source-line-no">1341</span><span id="line-1341"></span> |
| <span class="source-line-no">1342</span><span id="line-1342"> if (state != TransitionCode.READY_TO_SPLIT) {</span> |
| <span class="source-line-no">1343</span><span id="line-1343"> throw new UnexpectedStateException(</span> |
| <span class="source-line-no">1344</span><span id="line-1344"> "unsupported split regionState=" + state + " for parent region " + parent</span> |
| <span class="source-line-no">1345</span><span id="line-1345"> + " maybe an old RS (< 2.0) had the operation in progress");</span> |
| <span class="source-line-no">1346</span><span id="line-1346"> }</span> |
| <span class="source-line-no">1347</span><span id="line-1347"></span> |
| <span class="source-line-no">1348</span><span id="line-1348"> // sanity check on the request</span> |
| <span class="source-line-no">1349</span><span id="line-1349"> if (!Bytes.equals(hriA.getEndKey(), hriB.getStartKey())) {</span> |
| <span class="source-line-no">1350</span><span id="line-1350"> throw new UnsupportedOperationException("unsupported split request with bad keys: parent="</span> |
| <span class="source-line-no">1351</span><span id="line-1351"> + parent + " hriA=" + hriA + " hriB=" + hriB);</span> |
| <span class="source-line-no">1352</span><span id="line-1352"> }</span> |
| <span class="source-line-no">1353</span><span id="line-1353"></span> |
| <span class="source-line-no">1354</span><span id="line-1354"> if (!master.isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) {</span> |
| <span class="source-line-no">1355</span><span id="line-1355"> LOG.warn("Split switch is off! skip split of " + parent);</span> |
| <span class="source-line-no">1356</span><span id="line-1356"> throw new DoNotRetryIOException(</span> |
| <span class="source-line-no">1357</span><span id="line-1357"> "Split region " + parent.getRegionNameAsString() + " failed due to split switch off");</span> |
| <span class="source-line-no">1358</span><span id="line-1358"> }</span> |
| <span class="source-line-no">1359</span><span id="line-1359"></span> |
| <span class="source-line-no">1360</span><span id="line-1360"> // Submit the Split procedure</span> |
| <span class="source-line-no">1361</span><span id="line-1361"> final byte[] splitKey = hriB.getStartKey();</span> |
| <span class="source-line-no">1362</span><span id="line-1362"> if (LOG.isDebugEnabled()) {</span> |
| <span class="source-line-no">1363</span><span id="line-1363"> LOG.debug("Split request from {}, parent={}, splitKey={}", serverNode.getServerName(), parent,</span> |
| <span class="source-line-no">1364</span><span id="line-1364"> Bytes.toStringBinary(splitKey));</span> |
| <span class="source-line-no">1365</span><span id="line-1365"> }</span> |
| <span class="source-line-no">1366</span><span id="line-1366"> // Processing this report happens asynchronously from other activities which can mutate</span> |
| <span class="source-line-no">1367</span><span id="line-1367"> // the region state. For example, a split procedure may already be running for this parent.</span> |
| <span class="source-line-no">1368</span><span id="line-1368"> // A split procedure cannot succeed if the parent region is no longer open, so we can</span> |
| <span class="source-line-no">1369</span><span id="line-1369"> // ignore it in that case.</span> |
| <span class="source-line-no">1370</span><span id="line-1370"> // Note that submitting more than one split procedure for a given region is</span> |
| <span class="source-line-no">1371</span><span id="line-1371"> // harmless -- the split is fenced in the procedure handling -- but it would be noisy in</span> |
| <span class="source-line-no">1372</span><span id="line-1372"> // the logs. Only one procedure can succeed. The other procedure(s) would abort during</span> |
| <span class="source-line-no">1373</span><span id="line-1373"> // initialization and report failure with WARN level logging.</span> |
| <span class="source-line-no">1374</span><span id="line-1374"> RegionState parentState = regionStates.getRegionState(parent);</span> |
| <span class="source-line-no">1375</span><span id="line-1375"> if (parentState != null && parentState.isOpened()) {</span> |
| <span class="source-line-no">1376</span><span id="line-1376"> master.getMasterProcedureExecutor().submitProcedure(createSplitProcedure(parent, splitKey));</span> |
| <span class="source-line-no">1377</span><span id="line-1377"> } else {</span> |
| <span class="source-line-no">1378</span><span id="line-1378"> LOG.info("Ignoring split request from {}, parent={} because parent is unknown or not open",</span> |
| <span class="source-line-no">1379</span><span id="line-1379"> serverNode.getServerName(), parent);</span> |
| <span class="source-line-no">1380</span><span id="line-1380"> return;</span> |
| <span class="source-line-no">1381</span><span id="line-1381"> }</span> |
| <span class="source-line-no">1382</span><span id="line-1382"></span> |
| <span class="source-line-no">1383</span><span id="line-1383"> // If the RS is < 2.0 throw an exception to abort the operation, we are handling the split</span> |
| <span class="source-line-no">1384</span><span id="line-1384"> if (master.getServerManager().getVersionNumber(serverNode.getServerName()) < 0x0200000) {</span> |
| <span class="source-line-no">1385</span><span id="line-1385"> throw new UnsupportedOperationException(</span> |
| <span class="source-line-no">1386</span><span id="line-1386"> String.format("Split handled by the master: " + "parent=%s hriA=%s hriB=%s",</span> |
| <span class="source-line-no">1387</span><span id="line-1387"> parent.getShortNameToLog(), hriA, hriB));</span> |
| <span class="source-line-no">1388</span><span id="line-1388"> }</span> |
| <span class="source-line-no">1389</span><span id="line-1389"> }</span> |
| <span class="source-line-no">1390</span><span id="line-1390"></span> |
| <span class="source-line-no">1391</span><span id="line-1391"> private void updateRegionMergeTransition(final ServerStateNode serverNode,</span> |
| <span class="source-line-no">1392</span><span id="line-1392"> final TransitionCode state, final RegionInfo merged, final RegionInfo hriA,</span> |
| <span class="source-line-no">1393</span><span id="line-1393"> final RegionInfo hriB) throws IOException {</span> |
| <span class="source-line-no">1394</span><span id="line-1394"> checkMetaLoaded(merged);</span> |
| <span class="source-line-no">1395</span><span id="line-1395"></span> |
| <span class="source-line-no">1396</span><span id="line-1396"> if (state != TransitionCode.READY_TO_MERGE) {</span> |
| <span class="source-line-no">1397</span><span id="line-1397"> throw new UnexpectedStateException(</span> |
| <span class="source-line-no">1398</span><span id="line-1398"> "Unsupported merge regionState=" + state + " for regionA=" + hriA + " regionB=" + hriB</span> |
| <span class="source-line-no">1399</span><span id="line-1399"> + " merged=" + merged + " maybe an old RS (< 2.0) had the operation in progress");</span> |
| <span class="source-line-no">1400</span><span id="line-1400"> }</span> |
| <span class="source-line-no">1401</span><span id="line-1401"></span> |
| <span class="source-line-no">1402</span><span id="line-1402"> if (!master.isSplitOrMergeEnabled(MasterSwitchType.MERGE)) {</span> |
| <span class="source-line-no">1403</span><span id="line-1403"> LOG.warn("Merge switch is off! skip merge of regionA=" + hriA + " regionB=" + hriB);</span> |
| <span class="source-line-no">1404</span><span id="line-1404"> throw new DoNotRetryIOException(</span> |
| <span class="source-line-no">1405</span><span id="line-1405"> "Merge of regionA=" + hriA + " regionB=" + hriB + " failed because merge switch is off");</span> |
| <span class="source-line-no">1406</span><span id="line-1406"> }</span> |
| <span class="source-line-no">1407</span><span id="line-1407"></span> |
| <span class="source-line-no">1408</span><span id="line-1408"> // Submit the Merge procedure</span> |
| <span class="source-line-no">1409</span><span id="line-1409"> if (LOG.isDebugEnabled()) {</span> |
| <span class="source-line-no">1410</span><span id="line-1410"> LOG.debug("Handling merge request from RS=" + merged + ", merged=" + merged);</span> |
| <span class="source-line-no">1411</span><span id="line-1411"> }</span> |
| <span class="source-line-no">1412</span><span id="line-1412"> master.getMasterProcedureExecutor().submitProcedure(createMergeProcedure(hriA, hriB));</span> |
| <span class="source-line-no">1413</span><span id="line-1413"></span> |
| <span class="source-line-no">1414</span><span id="line-1414"> // If the RS is < 2.0 throw an exception to abort the operation, we are handling the merge</span> |
| <span class="source-line-no">1415</span><span id="line-1415"> if (master.getServerManager().getVersionNumber(serverNode.getServerName()) < 0x0200000) {</span> |
| <span class="source-line-no">1416</span><span id="line-1416"> throw new UnsupportedOperationException(</span> |
| <span class="source-line-no">1417</span><span id="line-1417"> String.format("Merge not handled yet: regionState=%s merged=%s hriA=%s hriB=%s", state,</span> |
| <span class="source-line-no">1418</span><span id="line-1418"> merged, hriA, hriB));</span> |
| <span class="source-line-no">1419</span><span id="line-1419"> }</span> |
| <span class="source-line-no">1420</span><span id="line-1420"> }</span> |
| <span class="source-line-no">1421</span><span id="line-1421"></span> |
| <span class="source-line-no">1422</span><span id="line-1422"> // ============================================================================================</span> |
| <span class="source-line-no">1423</span><span id="line-1423"> // RS Status update (report online regions) helpers</span> |
| <span class="source-line-no">1424</span><span id="line-1424"> // ============================================================================================</span> |
| <span class="source-line-no">1425</span><span id="line-1425"> /**</span> |
| <span class="source-line-no">1426</span><span id="line-1426"> * The master will call this method when the RS send the regionServerReport(). The report will</span> |
| <span class="source-line-no">1427</span><span id="line-1427"> * contains the "online regions". This method will check the the online regions against the</span> |
| <span class="source-line-no">1428</span><span id="line-1428"> * in-memory state of the AM, and we will log a warn message if there is a mismatch. This is</span> |
| <span class="source-line-no">1429</span><span id="line-1429"> * because that there is no fencing between the reportRegionStateTransition method and</span> |
| <span class="source-line-no">1430</span><span id="line-1430"> * regionServerReport method, so there could be race and introduce inconsistency here, but</span> |
| <span class="source-line-no">1431</span><span id="line-1431"> * actually there is no problem.</span> |
| <span class="source-line-no">1432</span><span id="line-1432"> * <p/></span> |
| <span class="source-line-no">1433</span><span id="line-1433"> * Please see HBASE-21421 and HBASE-21463 for more details.</span> |
| <span class="source-line-no">1434</span><span id="line-1434"> */</span> |
| <span class="source-line-no">1435</span><span id="line-1435"> public void reportOnlineRegions(ServerName serverName, Set<byte[]> regionNames) {</span> |
| <span class="source-line-no">1436</span><span id="line-1436"> if (!isRunning()) {</span> |
| <span class="source-line-no">1437</span><span id="line-1437"> return;</span> |
| <span class="source-line-no">1438</span><span id="line-1438"> }</span> |
| <span class="source-line-no">1439</span><span id="line-1439"> if (LOG.isTraceEnabled()) {</span> |
| <span class="source-line-no">1440</span><span id="line-1440"> LOG.trace("ReportOnlineRegions {} regionCount={}, metaLoaded={} {}", serverName,</span> |
| <span class="source-line-no">1441</span><span id="line-1441"> regionNames.size(), isMetaLoaded(),</span> |
| <span class="source-line-no">1442</span><span id="line-1442"> regionNames.stream().map(Bytes::toStringBinary).collect(Collectors.toList()));</span> |
| <span class="source-line-no">1443</span><span id="line-1443"> }</span> |
| <span class="source-line-no">1444</span><span id="line-1444"></span> |
| <span class="source-line-no">1445</span><span id="line-1445"> ServerStateNode serverNode = regionStates.getServerNode(serverName);</span> |
| <span class="source-line-no">1446</span><span id="line-1446"> if (serverNode == null) {</span> |
| <span class="source-line-no">1447</span><span id="line-1447"> LOG.warn("Got a report from server {} where its server node is null", serverName);</span> |
| <span class="source-line-no">1448</span><span id="line-1448"> return;</span> |
| <span class="source-line-no">1449</span><span id="line-1449"> }</span> |
| <span class="source-line-no">1450</span><span id="line-1450"> serverNode.readLock().lock();</span> |
| <span class="source-line-no">1451</span><span id="line-1451"> try {</span> |
| <span class="source-line-no">1452</span><span id="line-1452"> if (!serverNode.isInState(ServerState.ONLINE)) {</span> |
| <span class="source-line-no">1453</span><span id="line-1453"> LOG.warn("Got a report from a server result in state {}", serverNode);</span> |
| <span class="source-line-no">1454</span><span id="line-1454"> return;</span> |
| <span class="source-line-no">1455</span><span id="line-1455"> }</span> |
| <span class="source-line-no">1456</span><span id="line-1456"> } finally {</span> |
| <span class="source-line-no">1457</span><span id="line-1457"> serverNode.readLock().unlock();</span> |
| <span class="source-line-no">1458</span><span id="line-1458"> }</span> |
| <span class="source-line-no">1459</span><span id="line-1459"></span> |
| <span class="source-line-no">1460</span><span id="line-1460"> // Track the regionserver reported online regions in memory.</span> |
| <span class="source-line-no">1461</span><span id="line-1461"> synchronized (rsReports) {</span> |
| <span class="source-line-no">1462</span><span id="line-1462"> rsReports.put(serverName, regionNames);</span> |
| <span class="source-line-no">1463</span><span id="line-1463"> }</span> |
| <span class="source-line-no">1464</span><span id="line-1464"></span> |
| <span class="source-line-no">1465</span><span id="line-1465"> if (regionNames.isEmpty()) {</span> |
| <span class="source-line-no">1466</span><span id="line-1466"> // nothing to do if we don't have regions</span> |
| <span class="source-line-no">1467</span><span id="line-1467"> LOG.trace("no online region found on {}", serverName);</span> |
| <span class="source-line-no">1468</span><span id="line-1468"> return;</span> |
| <span class="source-line-no">1469</span><span id="line-1469"> }</span> |
| <span class="source-line-no">1470</span><span id="line-1470"> if (!isMetaLoaded()) {</span> |
| <span class="source-line-no">1471</span><span id="line-1471"> // we are still on startup, skip checking</span> |
| <span class="source-line-no">1472</span><span id="line-1472"> return;</span> |
| <span class="source-line-no">1473</span><span id="line-1473"> }</span> |
| <span class="source-line-no">1474</span><span id="line-1474"> // The Heartbeat tells us of what regions are on the region serve, check the state.</span> |
| <span class="source-line-no">1475</span><span id="line-1475"> checkOnlineRegionsReport(serverNode, regionNames);</span> |
| <span class="source-line-no">1476</span><span id="line-1476"> }</span> |
| <span class="source-line-no">1477</span><span id="line-1477"></span> |
| <span class="source-line-no">1478</span><span id="line-1478"> /**</span> |
| <span class="source-line-no">1479</span><span id="line-1479"> * Close <code>regionName</code> on <code>sn</code> silently and immediately without using a</span> |
| <span class="source-line-no">1480</span><span id="line-1480"> * Procedure or going via hbase:meta. For case where a RegionServer's hosting of a Region is not</span> |
| <span class="source-line-no">1481</span><span id="line-1481"> * aligned w/ the Master's accounting of Region state. This is for cleaning up an error in</span> |
| <span class="source-line-no">1482</span><span id="line-1482"> * accounting.</span> |
| <span class="source-line-no">1483</span><span id="line-1483"> */</span> |
| <span class="source-line-no">1484</span><span id="line-1484"> private void closeRegionSilently(ServerName sn, byte[] regionName) {</span> |
| <span class="source-line-no">1485</span><span id="line-1485"> try {</span> |
| <span class="source-line-no">1486</span><span id="line-1486"> RegionInfo ri = CatalogFamilyFormat.parseRegionInfoFromRegionName(regionName);</span> |
| <span class="source-line-no">1487</span><span id="line-1487"> // Pass -1 for timeout. Means do not wait.</span> |
| <span class="source-line-no">1488</span><span id="line-1488"> ServerManager.closeRegionSilentlyAndWait(this.master.getAsyncClusterConnection(), sn, ri, -1);</span> |
| <span class="source-line-no">1489</span><span id="line-1489"> } catch (Exception e) {</span> |
| <span class="source-line-no">1490</span><span id="line-1490"> LOG.error("Failed trying to close {} on {}", Bytes.toStringBinary(regionName), sn, e);</span> |
| <span class="source-line-no">1491</span><span id="line-1491"> }</span> |
| <span class="source-line-no">1492</span><span id="line-1492"> }</span> |
| <span class="source-line-no">1493</span><span id="line-1493"></span> |
| <span class="source-line-no">1494</span><span id="line-1494"> /**</span> |
| <span class="source-line-no">1495</span><span id="line-1495"> * Check that what the RegionServer reports aligns with the Master's image. If disagreement, we</span> |
| <span class="source-line-no">1496</span><span id="line-1496"> * will tell the RegionServer to expediently close a Region we do not think it should have.</span> |
| <span class="source-line-no">1497</span><span id="line-1497"> */</span> |
| <span class="source-line-no">1498</span><span id="line-1498"> private void checkOnlineRegionsReport(ServerStateNode serverNode, Set<byte[]> regionNames) {</span> |
| <span class="source-line-no">1499</span><span id="line-1499"> ServerName serverName = serverNode.getServerName();</span> |
| <span class="source-line-no">1500</span><span id="line-1500"> for (byte[] regionName : regionNames) {</span> |
| <span class="source-line-no">1501</span><span id="line-1501"> if (!isRunning()) {</span> |
| <span class="source-line-no">1502</span><span id="line-1502"> return;</span> |
| <span class="source-line-no">1503</span><span id="line-1503"> }</span> |
| <span class="source-line-no">1504</span><span id="line-1504"> RegionStateNode regionNode = regionStates.getRegionStateNodeFromName(regionName);</span> |
| <span class="source-line-no">1505</span><span id="line-1505"> if (regionNode == null) {</span> |
| <span class="source-line-no">1506</span><span id="line-1506"> String regionNameAsStr = Bytes.toStringBinary(regionName);</span> |
| <span class="source-line-no">1507</span><span id="line-1507"> LOG.warn("No RegionStateNode for {} but reported as up on {}; closing...", regionNameAsStr,</span> |
| <span class="source-line-no">1508</span><span id="line-1508"> serverName);</span> |
| <span class="source-line-no">1509</span><span id="line-1509"> closeRegionSilently(serverNode.getServerName(), regionName);</span> |
| <span class="source-line-no">1510</span><span id="line-1510"> continue;</span> |
| <span class="source-line-no">1511</span><span id="line-1511"> }</span> |
| <span class="source-line-no">1512</span><span id="line-1512"> final long lag = 1000;</span> |
| <span class="source-line-no">1513</span><span id="line-1513"> // This is just a fallback check designed to identify unexpected data inconsistencies, so we</span> |
| <span class="source-line-no">1514</span><span id="line-1514"> // use tryLock to attempt to acquire the lock, and if the lock cannot be acquired, we skip the</span> |
| <span class="source-line-no">1515</span><span id="line-1515"> // check. This will not cause any additional problems and also prevents the regionServerReport</span> |
| <span class="source-line-no">1516</span><span id="line-1516"> // call from being stuck for too long which may cause deadlock on region assignment.</span> |
| <span class="source-line-no">1517</span><span id="line-1517"> if (regionNode.tryLock()) {</span> |
| <span class="source-line-no">1518</span><span id="line-1518"> try {</span> |
| <span class="source-line-no">1519</span><span id="line-1519"> long diff = EnvironmentEdgeManager.currentTime() - regionNode.getLastUpdate();</span> |
| <span class="source-line-no">1520</span><span id="line-1520"> if (regionNode.isInState(State.OPENING, State.OPEN)) {</span> |
| <span class="source-line-no">1521</span><span id="line-1521"> // This is possible as a region server has just closed a region but the region server</span> |
| <span class="source-line-no">1522</span><span id="line-1522"> // report is generated before the closing, but arrive after the closing. Make sure</span> |
| <span class="source-line-no">1523</span><span id="line-1523"> // there</span> |
| <span class="source-line-no">1524</span><span id="line-1524"> // is some elapsed time so less false alarms.</span> |
| <span class="source-line-no">1525</span><span id="line-1525"> if (!regionNode.getRegionLocation().equals(serverName) && diff > lag) {</span> |
| <span class="source-line-no">1526</span><span id="line-1526"> LOG.warn("Reporting {} server does not match {} (time since last "</span> |
| <span class="source-line-no">1527</span><span id="line-1527"> + "update={}ms); closing...", serverName, regionNode, diff);</span> |
| <span class="source-line-no">1528</span><span id="line-1528"> closeRegionSilently(serverNode.getServerName(), regionName);</span> |
| <span class="source-line-no">1529</span><span id="line-1529"> }</span> |
| <span class="source-line-no">1530</span><span id="line-1530"> } else if (!regionNode.isInState(State.CLOSING, State.SPLITTING)) {</span> |
| <span class="source-line-no">1531</span><span id="line-1531"> // So, we can get report that a region is CLOSED or SPLIT because a heartbeat</span> |
| <span class="source-line-no">1532</span><span id="line-1532"> // came in at about same time as a region transition. Make sure there is some</span> |
| <span class="source-line-no">1533</span><span id="line-1533"> // elapsed time so less false alarms.</span> |
| <span class="source-line-no">1534</span><span id="line-1534"> if (diff > lag) {</span> |
| <span class="source-line-no">1535</span><span id="line-1535"> LOG.warn("Reporting {} state does not match {} (time since last update={}ms)",</span> |
| <span class="source-line-no">1536</span><span id="line-1536"> serverName, regionNode, diff);</span> |
| <span class="source-line-no">1537</span><span id="line-1537"> }</span> |
| <span class="source-line-no">1538</span><span id="line-1538"> }</span> |
| <span class="source-line-no">1539</span><span id="line-1539"> } finally {</span> |
| <span class="source-line-no">1540</span><span id="line-1540"> regionNode.unlock();</span> |
| <span class="source-line-no">1541</span><span id="line-1541"> }</span> |
| <span class="source-line-no">1542</span><span id="line-1542"> } else {</span> |
| <span class="source-line-no">1543</span><span id="line-1543"> LOG.warn(</span> |
| <span class="source-line-no">1544</span><span id="line-1544"> "Unable to acquire lock for regionNode {}. It is likely that another thread is currently holding the lock. To avoid deadlock, skip execution for now.",</span> |
| <span class="source-line-no">1545</span><span id="line-1545"> regionNode);</span> |
| <span class="source-line-no">1546</span><span id="line-1546"> }</span> |
| <span class="source-line-no">1547</span><span id="line-1547"> }</span> |
| <span class="source-line-no">1548</span><span id="line-1548"> }</span> |
| <span class="source-line-no">1549</span><span id="line-1549"></span> |
| <span class="source-line-no">1550</span><span id="line-1550"> // ============================================================================================</span> |
| <span class="source-line-no">1551</span><span id="line-1551"> // RIT chore</span> |
| <span class="source-line-no">1552</span><span id="line-1552"> // ============================================================================================</span> |
| <span class="source-line-no">1553</span><span id="line-1553"> private static class RegionInTransitionChore extends ProcedureInMemoryChore<MasterProcedureEnv> {</span> |
| <span class="source-line-no">1554</span><span id="line-1554"> public RegionInTransitionChore(final int timeoutMsec) {</span> |
| <span class="source-line-no">1555</span><span id="line-1555"> super(timeoutMsec);</span> |
| <span class="source-line-no">1556</span><span id="line-1556"> }</span> |
| <span class="source-line-no">1557</span><span id="line-1557"></span> |
| <span class="source-line-no">1558</span><span id="line-1558"> @Override</span> |
| <span class="source-line-no">1559</span><span id="line-1559"> protected void periodicExecute(final MasterProcedureEnv env) {</span> |
| <span class="source-line-no">1560</span><span id="line-1560"> final AssignmentManager am = env.getAssignmentManager();</span> |
| <span class="source-line-no">1561</span><span id="line-1561"></span> |
| <span class="source-line-no">1562</span><span id="line-1562"> final RegionInTransitionStat ritStat = am.computeRegionInTransitionStat();</span> |
| <span class="source-line-no">1563</span><span id="line-1563"> if (ritStat.hasRegionsOverThreshold()) {</span> |
| <span class="source-line-no">1564</span><span id="line-1564"> for (RegionState hri : ritStat.getRegionOverThreshold()) {</span> |
| <span class="source-line-no">1565</span><span id="line-1565"> am.handleRegionOverStuckWarningThreshold(hri.getRegion());</span> |
| <span class="source-line-no">1566</span><span id="line-1566"> }</span> |
| <span class="source-line-no">1567</span><span id="line-1567"> }</span> |
| <span class="source-line-no">1568</span><span id="line-1568"></span> |
| <span class="source-line-no">1569</span><span id="line-1569"> // update metrics</span> |
| <span class="source-line-no">1570</span><span id="line-1570"> am.updateRegionsInTransitionMetrics(ritStat);</span> |
| <span class="source-line-no">1571</span><span id="line-1571"> }</span> |
| <span class="source-line-no">1572</span><span id="line-1572"> }</span> |
| <span class="source-line-no">1573</span><span id="line-1573"></span> |
| <span class="source-line-no">1574</span><span id="line-1574"> private static class DeadServerMetricRegionChore</span> |
| <span class="source-line-no">1575</span><span id="line-1575"> extends ProcedureInMemoryChore<MasterProcedureEnv> {</span> |
| <span class="source-line-no">1576</span><span id="line-1576"> public DeadServerMetricRegionChore(final int timeoutMsec) {</span> |
| <span class="source-line-no">1577</span><span id="line-1577"> super(timeoutMsec);</span> |
| <span class="source-line-no">1578</span><span id="line-1578"> }</span> |
| <span class="source-line-no">1579</span><span id="line-1579"></span> |
| <span class="source-line-no">1580</span><span id="line-1580"> @Override</span> |
| <span class="source-line-no">1581</span><span id="line-1581"> protected void periodicExecute(final MasterProcedureEnv env) {</span> |
| <span class="source-line-no">1582</span><span id="line-1582"> final ServerManager sm = env.getMasterServices().getServerManager();</span> |
| <span class="source-line-no">1583</span><span id="line-1583"> final AssignmentManager am = env.getAssignmentManager();</span> |
| <span class="source-line-no">1584</span><span id="line-1584"> // To minimize inconsistencies we are not going to snapshot live servers in advance in case</span> |
| <span class="source-line-no">1585</span><span id="line-1585"> // new servers are added; OTOH we don't want to add heavy sync for a consistent view since</span> |
| <span class="source-line-no">1586</span><span id="line-1586"> // this is for metrics. Instead, we're going to check each regions as we go; to avoid making</span> |
| <span class="source-line-no">1587</span><span id="line-1587"> // too many checks, we maintain a local lists of server, limiting us to false negatives. If</span> |
| <span class="source-line-no">1588</span><span id="line-1588"> // we miss some recently-dead server, we'll just see it next time.</span> |
| <span class="source-line-no">1589</span><span id="line-1589"> Set<ServerName> recentlyLiveServers = new HashSet<>();</span> |
| <span class="source-line-no">1590</span><span id="line-1590"> int deadRegions = 0, unknownRegions = 0;</span> |
| <span class="source-line-no">1591</span><span id="line-1591"> for (RegionStateNode rsn : am.getRegionStates().getRegionStateNodes()) {</span> |
| <span class="source-line-no">1592</span><span id="line-1592"> if (rsn.getState() != State.OPEN) {</span> |
| <span class="source-line-no">1593</span><span id="line-1593"> continue; // Opportunistic check, should quickly skip RITs, offline tables, etc.</span> |
| <span class="source-line-no">1594</span><span id="line-1594"> }</span> |
| <span class="source-line-no">1595</span><span id="line-1595"> // Do not need to acquire region state lock as this is only for showing metrics.</span> |
| <span class="source-line-no">1596</span><span id="line-1596"> ServerName sn = rsn.getRegionLocation();</span> |
| <span class="source-line-no">1597</span><span id="line-1597"> State state = rsn.getState();</span> |
| <span class="source-line-no">1598</span><span id="line-1598"> if (state != State.OPEN) {</span> |
| <span class="source-line-no">1599</span><span id="line-1599"> continue; // Mostly skipping RITs that are already being take care of.</span> |
| <span class="source-line-no">1600</span><span id="line-1600"> }</span> |
| <span class="source-line-no">1601</span><span id="line-1601"> if (sn == null) {</span> |
| <span class="source-line-no">1602</span><span id="line-1602"> ++unknownRegions; // Opened on null?</span> |
| <span class="source-line-no">1603</span><span id="line-1603"> continue;</span> |
| <span class="source-line-no">1604</span><span id="line-1604"> }</span> |
| <span class="source-line-no">1605</span><span id="line-1605"> if (recentlyLiveServers.contains(sn)) {</span> |
| <span class="source-line-no">1606</span><span id="line-1606"> continue;</span> |
| <span class="source-line-no">1607</span><span id="line-1607"> }</span> |
| <span class="source-line-no">1608</span><span id="line-1608"> ServerManager.ServerLiveState sls = sm.isServerKnownAndOnline(sn);</span> |
| <span class="source-line-no">1609</span><span id="line-1609"> switch (sls) {</span> |
| <span class="source-line-no">1610</span><span id="line-1610"> case LIVE:</span> |
| <span class="source-line-no">1611</span><span id="line-1611"> recentlyLiveServers.add(sn);</span> |
| <span class="source-line-no">1612</span><span id="line-1612"> break;</span> |
| <span class="source-line-no">1613</span><span id="line-1613"> case DEAD:</span> |
| <span class="source-line-no">1614</span><span id="line-1614"> ++deadRegions;</span> |
| <span class="source-line-no">1615</span><span id="line-1615"> break;</span> |
| <span class="source-line-no">1616</span><span id="line-1616"> case UNKNOWN:</span> |
| <span class="source-line-no">1617</span><span id="line-1617"> ++unknownRegions;</span> |
| <span class="source-line-no">1618</span><span id="line-1618"> break;</span> |
| <span class="source-line-no">1619</span><span id="line-1619"> default:</span> |
| <span class="source-line-no">1620</span><span id="line-1620"> throw new AssertionError("Unexpected " + sls);</span> |
| <span class="source-line-no">1621</span><span id="line-1621"> }</span> |
| <span class="source-line-no">1622</span><span id="line-1622"> }</span> |
| <span class="source-line-no">1623</span><span id="line-1623"> if (deadRegions > 0 || unknownRegions > 0) {</span> |
| <span class="source-line-no">1624</span><span id="line-1624"> LOG.info("Found {} OPEN regions on dead servers and {} OPEN regions on unknown servers",</span> |
| <span class="source-line-no">1625</span><span id="line-1625"> deadRegions, unknownRegions);</span> |
| <span class="source-line-no">1626</span><span id="line-1626"> }</span> |
| <span class="source-line-no">1627</span><span id="line-1627"></span> |
| <span class="source-line-no">1628</span><span id="line-1628"> am.updateDeadServerRegionMetrics(deadRegions, unknownRegions);</span> |
| <span class="source-line-no">1629</span><span id="line-1629"> }</span> |
| <span class="source-line-no">1630</span><span id="line-1630"> }</span> |
| <span class="source-line-no">1631</span><span id="line-1631"></span> |
| <span class="source-line-no">1632</span><span id="line-1632"> public RegionInTransitionStat computeRegionInTransitionStat() {</span> |
| <span class="source-line-no">1633</span><span id="line-1633"> final RegionInTransitionStat rit = new RegionInTransitionStat(getConfiguration());</span> |
| <span class="source-line-no">1634</span><span id="line-1634"> rit.update(this);</span> |
| <span class="source-line-no">1635</span><span id="line-1635"> return rit;</span> |
| <span class="source-line-no">1636</span><span id="line-1636"> }</span> |
| <span class="source-line-no">1637</span><span id="line-1637"></span> |
| <span class="source-line-no">1638</span><span id="line-1638"> public static class RegionInTransitionStat {</span> |
| <span class="source-line-no">1639</span><span id="line-1639"> private final int ritThreshold;</span> |
| <span class="source-line-no">1640</span><span id="line-1640"></span> |
| <span class="source-line-no">1641</span><span id="line-1641"> private HashMap<String, RegionState> ritsOverThreshold = null;</span> |
| <span class="source-line-no">1642</span><span id="line-1642"> private long statTimestamp;</span> |
| <span class="source-line-no">1643</span><span id="line-1643"> private long oldestRITTime = 0;</span> |
| <span class="source-line-no">1644</span><span id="line-1644"> private int totalRITsTwiceThreshold = 0;</span> |
| <span class="source-line-no">1645</span><span id="line-1645"> private int totalRITs = 0;</span> |
| <span class="source-line-no">1646</span><span id="line-1646"></span> |
| <span class="source-line-no">1647</span><span id="line-1647"> public RegionInTransitionStat(final Configuration conf) {</span> |
| <span class="source-line-no">1648</span><span id="line-1648"> this.ritThreshold =</span> |
| <span class="source-line-no">1649</span><span id="line-1649"> conf.getInt(METRICS_RIT_STUCK_WARNING_THRESHOLD, DEFAULT_RIT_STUCK_WARNING_THRESHOLD);</span> |
| <span class="source-line-no">1650</span><span id="line-1650"> }</span> |
| <span class="source-line-no">1651</span><span id="line-1651"></span> |
| <span class="source-line-no">1652</span><span id="line-1652"> public int getRITThreshold() {</span> |
| <span class="source-line-no">1653</span><span id="line-1653"> return ritThreshold;</span> |
| <span class="source-line-no">1654</span><span id="line-1654"> }</span> |
| <span class="source-line-no">1655</span><span id="line-1655"></span> |
| <span class="source-line-no">1656</span><span id="line-1656"> public long getTimestamp() {</span> |
| <span class="source-line-no">1657</span><span id="line-1657"> return statTimestamp;</span> |
| <span class="source-line-no">1658</span><span id="line-1658"> }</span> |
| <span class="source-line-no">1659</span><span id="line-1659"></span> |
| <span class="source-line-no">1660</span><span id="line-1660"> public int getTotalRITs() {</span> |
| <span class="source-line-no">1661</span><span id="line-1661"> return totalRITs;</span> |
| <span class="source-line-no">1662</span><span id="line-1662"> }</span> |
| <span class="source-line-no">1663</span><span id="line-1663"></span> |
| <span class="source-line-no">1664</span><span id="line-1664"> public long getOldestRITTime() {</span> |
| <span class="source-line-no">1665</span><span id="line-1665"> return oldestRITTime;</span> |
| <span class="source-line-no">1666</span><span id="line-1666"> }</span> |
| <span class="source-line-no">1667</span><span id="line-1667"></span> |
| <span class="source-line-no">1668</span><span id="line-1668"> public int getTotalRITsOverThreshold() {</span> |
| <span class="source-line-no">1669</span><span id="line-1669"> Map<String, RegionState> m = this.ritsOverThreshold;</span> |
| <span class="source-line-no">1670</span><span id="line-1670"> return m != null ? m.size() : 0;</span> |
| <span class="source-line-no">1671</span><span id="line-1671"> }</span> |
| <span class="source-line-no">1672</span><span id="line-1672"></span> |
| <span class="source-line-no">1673</span><span id="line-1673"> public boolean hasRegionsTwiceOverThreshold() {</span> |
| <span class="source-line-no">1674</span><span id="line-1674"> return totalRITsTwiceThreshold > 0;</span> |
| <span class="source-line-no">1675</span><span id="line-1675"> }</span> |
| <span class="source-line-no">1676</span><span id="line-1676"></span> |
| <span class="source-line-no">1677</span><span id="line-1677"> public boolean hasRegionsOverThreshold() {</span> |
| <span class="source-line-no">1678</span><span id="line-1678"> Map<String, RegionState> m = this.ritsOverThreshold;</span> |
| <span class="source-line-no">1679</span><span id="line-1679"> return m != null && !m.isEmpty();</span> |
| <span class="source-line-no">1680</span><span id="line-1680"> }</span> |
| <span class="source-line-no">1681</span><span id="line-1681"></span> |
| <span class="source-line-no">1682</span><span id="line-1682"> public Collection<RegionState> getRegionOverThreshold() {</span> |
| <span class="source-line-no">1683</span><span id="line-1683"> Map<String, RegionState> m = this.ritsOverThreshold;</span> |
| <span class="source-line-no">1684</span><span id="line-1684"> return m != null ? m.values() : Collections.emptySet();</span> |
| <span class="source-line-no">1685</span><span id="line-1685"> }</span> |
| <span class="source-line-no">1686</span><span id="line-1686"></span> |
| <span class="source-line-no">1687</span><span id="line-1687"> public boolean isRegionOverThreshold(final RegionInfo regionInfo) {</span> |
| <span class="source-line-no">1688</span><span id="line-1688"> Map<String, RegionState> m = this.ritsOverThreshold;</span> |
| <span class="source-line-no">1689</span><span id="line-1689"> return m != null && m.containsKey(regionInfo.getEncodedName());</span> |
| <span class="source-line-no">1690</span><span id="line-1690"> }</span> |
| <span class="source-line-no">1691</span><span id="line-1691"></span> |
| <span class="source-line-no">1692</span><span id="line-1692"> public boolean isRegionTwiceOverThreshold(final RegionInfo regionInfo) {</span> |
| <span class="source-line-no">1693</span><span id="line-1693"> Map<String, RegionState> m = this.ritsOverThreshold;</span> |
| <span class="source-line-no">1694</span><span id="line-1694"> if (m == null) {</span> |
| <span class="source-line-no">1695</span><span id="line-1695"> return false;</span> |
| <span class="source-line-no">1696</span><span id="line-1696"> }</span> |
| <span class="source-line-no">1697</span><span id="line-1697"> final RegionState state = m.get(regionInfo.getEncodedName());</span> |
| <span class="source-line-no">1698</span><span id="line-1698"> if (state == null) {</span> |
| <span class="source-line-no">1699</span><span id="line-1699"> return false;</span> |
| <span class="source-line-no">1700</span><span id="line-1700"> }</span> |
| <span class="source-line-no">1701</span><span id="line-1701"> return (statTimestamp - state.getStamp()) > (ritThreshold * 2);</span> |
| <span class="source-line-no">1702</span><span id="line-1702"> }</span> |
| <span class="source-line-no">1703</span><span id="line-1703"></span> |
| <span class="source-line-no">1704</span><span id="line-1704"> protected void update(final AssignmentManager am) {</span> |
| <span class="source-line-no">1705</span><span id="line-1705"> final RegionStates regionStates = am.getRegionStates();</span> |
| <span class="source-line-no">1706</span><span id="line-1706"> this.statTimestamp = EnvironmentEdgeManager.currentTime();</span> |
| <span class="source-line-no">1707</span><span id="line-1707"> update(regionStates.getRegionsStateInTransition(), statTimestamp);</span> |
| <span class="source-line-no">1708</span><span id="line-1708"> update(regionStates.getRegionFailedOpen(), statTimestamp);</span> |
| <span class="source-line-no">1709</span><span id="line-1709"></span> |
| <span class="source-line-no">1710</span><span id="line-1710"> if (LOG.isDebugEnabled() && ritsOverThreshold != null && !ritsOverThreshold.isEmpty()) {</span> |
| <span class="source-line-no">1711</span><span id="line-1711"> LOG.debug("RITs over threshold: {}",</span> |
| <span class="source-line-no">1712</span><span id="line-1712"> ritsOverThreshold.entrySet().stream()</span> |
| <span class="source-line-no">1713</span><span id="line-1713"> .map(e -> e.getKey() + ":" + e.getValue().getState().name())</span> |
| <span class="source-line-no">1714</span><span id="line-1714"> .collect(Collectors.joining("\n")));</span> |
| <span class="source-line-no">1715</span><span id="line-1715"> }</span> |
| <span class="source-line-no">1716</span><span id="line-1716"> }</span> |
| <span class="source-line-no">1717</span><span id="line-1717"></span> |
| <span class="source-line-no">1718</span><span id="line-1718"> private void update(final Collection<RegionState> regions, final long currentTime) {</span> |
| <span class="source-line-no">1719</span><span id="line-1719"> for (RegionState state : regions) {</span> |
| <span class="source-line-no">1720</span><span id="line-1720"> totalRITs++;</span> |
| <span class="source-line-no">1721</span><span id="line-1721"> final long ritStartedMs = state.getStamp();</span> |
| <span class="source-line-no">1722</span><span id="line-1722"> if (ritStartedMs == 0) {</span> |
| <span class="source-line-no">1723</span><span id="line-1723"> // Don't output bogus values to metrics if they accidentally make it here.</span> |
| <span class="source-line-no">1724</span><span id="line-1724"> LOG.warn("The RIT {} has no start time", state.getRegion());</span> |
| <span class="source-line-no">1725</span><span id="line-1725"> continue;</span> |
| <span class="source-line-no">1726</span><span id="line-1726"> }</span> |
| <span class="source-line-no">1727</span><span id="line-1727"> final long ritTime = currentTime - ritStartedMs;</span> |
| <span class="source-line-no">1728</span><span id="line-1728"> if (ritTime > ritThreshold) {</span> |
| <span class="source-line-no">1729</span><span id="line-1729"> if (ritsOverThreshold == null) {</span> |
| <span class="source-line-no">1730</span><span id="line-1730"> ritsOverThreshold = new HashMap<String, RegionState>();</span> |
| <span class="source-line-no">1731</span><span id="line-1731"> }</span> |
| <span class="source-line-no">1732</span><span id="line-1732"> ritsOverThreshold.put(state.getRegion().getEncodedName(), state);</span> |
| <span class="source-line-no">1733</span><span id="line-1733"> totalRITsTwiceThreshold += (ritTime > (ritThreshold * 2)) ? 1 : 0;</span> |
| <span class="source-line-no">1734</span><span id="line-1734"> }</span> |
| <span class="source-line-no">1735</span><span id="line-1735"> if (oldestRITTime < ritTime) {</span> |
| <span class="source-line-no">1736</span><span id="line-1736"> oldestRITTime = ritTime;</span> |
| <span class="source-line-no">1737</span><span id="line-1737"> }</span> |
| <span class="source-line-no">1738</span><span id="line-1738"> }</span> |
| <span class="source-line-no">1739</span><span id="line-1739"> }</span> |
| <span class="source-line-no">1740</span><span id="line-1740"> }</span> |
| <span class="source-line-no">1741</span><span id="line-1741"></span> |
| <span class="source-line-no">1742</span><span id="line-1742"> private void updateRegionsInTransitionMetrics(final RegionInTransitionStat ritStat) {</span> |
| <span class="source-line-no">1743</span><span id="line-1743"> metrics.updateRITOldestAge(ritStat.getOldestRITTime());</span> |
| <span class="source-line-no">1744</span><span id="line-1744"> metrics.updateRITCount(ritStat.getTotalRITs());</span> |
| <span class="source-line-no">1745</span><span id="line-1745"> metrics.updateRITCountOverThreshold(ritStat.getTotalRITsOverThreshold());</span> |
| <span class="source-line-no">1746</span><span id="line-1746"> }</span> |
| <span class="source-line-no">1747</span><span id="line-1747"></span> |
| <span class="source-line-no">1748</span><span id="line-1748"> private void updateDeadServerRegionMetrics(int deadRegions, int unknownRegions) {</span> |
| <span class="source-line-no">1749</span><span id="line-1749"> metrics.updateDeadServerOpenRegions(deadRegions);</span> |
| <span class="source-line-no">1750</span><span id="line-1750"> metrics.updateUnknownServerOpenRegions(unknownRegions);</span> |
| <span class="source-line-no">1751</span><span id="line-1751"> }</span> |
| <span class="source-line-no">1752</span><span id="line-1752"></span> |
| <span class="source-line-no">1753</span><span id="line-1753"> private void handleRegionOverStuckWarningThreshold(final RegionInfo regionInfo) {</span> |
| <span class="source-line-no">1754</span><span id="line-1754"> final RegionStateNode regionNode = regionStates.getRegionStateNode(regionInfo);</span> |
| <span class="source-line-no">1755</span><span id="line-1755"> // if (regionNode.isStuck()) {</span> |
| <span class="source-line-no">1756</span><span id="line-1756"> LOG.warn("STUCK Region-In-Transition {}", regionNode);</span> |
| <span class="source-line-no">1757</span><span id="line-1757"> }</span> |
| <span class="source-line-no">1758</span><span id="line-1758"></span> |
| <span class="source-line-no">1759</span><span id="line-1759"> // ============================================================================================</span> |
| <span class="source-line-no">1760</span><span id="line-1760"> // TODO: Master load/bootstrap</span> |
| <span class="source-line-no">1761</span><span id="line-1761"> // ============================================================================================</span> |
| <span class="source-line-no">1762</span><span id="line-1762"> public void joinCluster() throws IOException {</span> |
| <span class="source-line-no">1763</span><span id="line-1763"> long startTime = System.nanoTime();</span> |
| <span class="source-line-no">1764</span><span id="line-1764"> LOG.debug("Joining cluster...");</span> |
| <span class="source-line-no">1765</span><span id="line-1765"></span> |
| <span class="source-line-no">1766</span><span id="line-1766"> // Scan hbase:meta to build list of existing regions, servers, and assignment.</span> |
| <span class="source-line-no">1767</span><span id="line-1767"> // hbase:meta is online now or will be. Inside loadMeta, we keep trying. Can't make progress</span> |
| <span class="source-line-no">1768</span><span id="line-1768"> // w/o meta.</span> |
| <span class="source-line-no">1769</span><span id="line-1769"> loadMeta();</span> |
| <span class="source-line-no">1770</span><span id="line-1770"></span> |
| <span class="source-line-no">1771</span><span id="line-1771"> while (master.getServerManager().countOfRegionServers() < 1) {</span> |
| <span class="source-line-no">1772</span><span id="line-1772"> LOG.info("Waiting for RegionServers to join; current count={}",</span> |
| <span class="source-line-no">1773</span><span id="line-1773"> master.getServerManager().countOfRegionServers());</span> |
| <span class="source-line-no">1774</span><span id="line-1774"> Threads.sleep(250);</span> |
| <span class="source-line-no">1775</span><span id="line-1775"> }</span> |
| <span class="source-line-no">1776</span><span id="line-1776"> LOG.info("Number of RegionServers={}", master.getServerManager().countOfRegionServers());</span> |
| <span class="source-line-no">1777</span><span id="line-1777"></span> |
| <span class="source-line-no">1778</span><span id="line-1778"> // Start the chores</span> |
| <span class="source-line-no">1779</span><span id="line-1779"> master.getMasterProcedureExecutor().addChore(this.ritChore);</span> |
| <span class="source-line-no">1780</span><span id="line-1780"> master.getMasterProcedureExecutor().addChore(this.deadMetricChore);</span> |
| <span class="source-line-no">1781</span><span id="line-1781"></span> |
| <span class="source-line-no">1782</span><span id="line-1782"> long costMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTime);</span> |
| <span class="source-line-no">1783</span><span id="line-1783"> LOG.info("Joined the cluster in {}", StringUtils.humanTimeDiff(costMs));</span> |
| <span class="source-line-no">1784</span><span id="line-1784"> }</span> |
| <span class="source-line-no">1785</span><span id="line-1785"></span> |
| <span class="source-line-no">1786</span><span id="line-1786"> /**</span> |
| <span class="source-line-no">1787</span><span id="line-1787"> * Create assign procedure for offline regions. Just follow the old</span> |
| <span class="source-line-no">1788</span><span id="line-1788"> * processofflineServersWithOnlineRegions method. Since now we do not need to deal with dead</span> |
| <span class="source-line-no">1789</span><span id="line-1789"> * server any more, we only deal with the regions in OFFLINE state in this method. And this is a</span> |
| <span class="source-line-no">1790</span><span id="line-1790"> * bit strange, that for new regions, we will add it in CLOSED state instead of OFFLINE state, and</span> |
| <span class="source-line-no">1791</span><span id="line-1791"> * usually there will be a procedure to track them. The processofflineServersWithOnlineRegions is</span> |
| <span class="source-line-no">1792</span><span id="line-1792"> * a legacy from long ago, as things are going really different now, maybe we do not need this</span> |
| <span class="source-line-no">1793</span><span id="line-1793"> * method any more. Need to revisit later.</span> |
| <span class="source-line-no">1794</span><span id="line-1794"> */</span> |
| <span class="source-line-no">1795</span><span id="line-1795"> // Public so can be run by the Master as part of the startup. Needs hbase:meta to be online.</span> |
| <span class="source-line-no">1796</span><span id="line-1796"> // Needs to be done after the table state manager has been started.</span> |
| <span class="source-line-no">1797</span><span id="line-1797"> public void processOfflineRegions() {</span> |
| <span class="source-line-no">1798</span><span id="line-1798"> TransitRegionStateProcedure[] procs =</span> |
| <span class="source-line-no">1799</span><span id="line-1799"> regionStates.getRegionStateNodes().stream().filter(rsn -> rsn.isInState(State.OFFLINE))</span> |
| <span class="source-line-no">1800</span><span id="line-1800"> .filter(rsn -> isTableEnabled(rsn.getRegionInfo().getTable())).map(rsn -> {</span> |
| <span class="source-line-no">1801</span><span id="line-1801"> rsn.lock();</span> |
| <span class="source-line-no">1802</span><span id="line-1802"> try {</span> |
| <span class="source-line-no">1803</span><span id="line-1803"> if (rsn.getProcedure() != null) {</span> |
| <span class="source-line-no">1804</span><span id="line-1804"> return null;</span> |
| <span class="source-line-no">1805</span><span id="line-1805"> } else {</span> |
| <span class="source-line-no">1806</span><span id="line-1806"> return rsn.setProcedure(TransitRegionStateProcedure.assign(getProcedureEnvironment(),</span> |
| <span class="source-line-no">1807</span><span id="line-1807"> rsn.getRegionInfo(), null));</span> |
| <span class="source-line-no">1808</span><span id="line-1808"> }</span> |
| <span class="source-line-no">1809</span><span id="line-1809"> } finally {</span> |
| <span class="source-line-no">1810</span><span id="line-1810"> rsn.unlock();</span> |
| <span class="source-line-no">1811</span><span id="line-1811"> }</span> |
| <span class="source-line-no">1812</span><span id="line-1812"> }).filter(p -> p != null).toArray(TransitRegionStateProcedure[]::new);</span> |
| <span class="source-line-no">1813</span><span id="line-1813"> if (procs.length > 0) {</span> |
| <span class="source-line-no">1814</span><span id="line-1814"> master.getMasterProcedureExecutor().submitProcedures(procs);</span> |
| <span class="source-line-no">1815</span><span id="line-1815"> }</span> |
| <span class="source-line-no">1816</span><span id="line-1816"> }</span> |
| <span class="source-line-no">1817</span><span id="line-1817"></span> |
| <span class="source-line-no">1818</span><span id="line-1818"> /*</span> |
| <span class="source-line-no">1819</span><span id="line-1819"> * AM internal RegionStateStore.RegionStateVisitor implementation. To be used when scanning META</span> |
| <span class="source-line-no">1820</span><span id="line-1820"> * table for region rows, using RegionStateStore utility methods. RegionStateStore methods will</span> |
| <span class="source-line-no">1821</span><span id="line-1821"> * convert Result into proper RegionInfo instances, but those would still need to be added into</span> |
| <span class="source-line-no">1822</span><span id="line-1822"> * AssignmentManager.regionStates in-memory cache. RegionMetaLoadingVisitor.visitRegionState</span> |
| <span class="source-line-no">1823</span><span id="line-1823"> * method provides the logic for adding RegionInfo instances as loaded from latest META scan into</span> |
| <span class="source-line-no">1824</span><span id="line-1824"> * AssignmentManager.regionStates.</span> |
| <span class="source-line-no">1825</span><span id="line-1825"> */</span> |
| <span class="source-line-no">1826</span><span id="line-1826"> private class RegionMetaLoadingVisitor implements RegionStateStore.RegionStateVisitor {</span> |
| <span class="source-line-no">1827</span><span id="line-1827"></span> |
| <span class="source-line-no">1828</span><span id="line-1828"> @Override</span> |
| <span class="source-line-no">1829</span><span id="line-1829"> public void visitRegionState(Result result, final RegionInfo regionInfo, final State state,</span> |
| <span class="source-line-no">1830</span><span id="line-1830"> final ServerName regionLocation, final ServerName lastHost, final long openSeqNum) {</span> |
| <span class="source-line-no">1831</span><span id="line-1831"> if (</span> |
| <span class="source-line-no">1832</span><span id="line-1832"> state == null && regionLocation == null && lastHost == null</span> |
| <span class="source-line-no">1833</span><span id="line-1833"> && openSeqNum == SequenceId.NO_SEQUENCE_ID</span> |
| <span class="source-line-no">1834</span><span id="line-1834"> ) {</span> |
| <span class="source-line-no">1835</span><span id="line-1835"> // This is a row with nothing in it.</span> |
| <span class="source-line-no">1836</span><span id="line-1836"> LOG.warn("Skipping empty row={}", result);</span> |
| <span class="source-line-no">1837</span><span id="line-1837"> return;</span> |
| <span class="source-line-no">1838</span><span id="line-1838"> }</span> |
| <span class="source-line-no">1839</span><span id="line-1839"> State localState = state;</span> |
| <span class="source-line-no">1840</span><span id="line-1840"> if (localState == null) {</span> |
| <span class="source-line-no">1841</span><span id="line-1841"> // No region state column data in hbase:meta table! Are I doing a rolling upgrade from</span> |
| <span class="source-line-no">1842</span><span id="line-1842"> // hbase1 to hbase2? Am I restoring a SNAPSHOT or otherwise adding a region to hbase:meta?</span> |
| <span class="source-line-no">1843</span><span id="line-1843"> // In any of these cases, state is empty. For now, presume OFFLINE but there are probably</span> |
| <span class="source-line-no">1844</span><span id="line-1844"> // cases where we need to probe more to be sure this correct; TODO informed by experience.</span> |
| <span class="source-line-no">1845</span><span id="line-1845"> LOG.info(regionInfo.getEncodedName() + " regionState=null; presuming " + State.OFFLINE);</span> |
| <span class="source-line-no">1846</span><span id="line-1846"> localState = State.OFFLINE;</span> |
| <span class="source-line-no">1847</span><span id="line-1847"> }</span> |
| <span class="source-line-no">1848</span><span id="line-1848"> RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo);</span> |
| <span class="source-line-no">1849</span><span id="line-1849"> // Do not need to lock on regionNode, as we can make sure that before we finish loading</span> |
| <span class="source-line-no">1850</span><span id="line-1850"> // meta, all the related procedures can not be executed. The only exception is for meta</span> |
| <span class="source-line-no">1851</span><span id="line-1851"> // region related operations, but here we do not load the informations for meta region.</span> |
| <span class="source-line-no">1852</span><span id="line-1852"> regionNode.setState(localState);</span> |
| <span class="source-line-no">1853</span><span id="line-1853"> regionNode.setLastHost(lastHost);</span> |
| <span class="source-line-no">1854</span><span id="line-1854"> regionNode.setRegionLocation(regionLocation);</span> |
| <span class="source-line-no">1855</span><span id="line-1855"> regionNode.setOpenSeqNum(openSeqNum);</span> |
| <span class="source-line-no">1856</span><span id="line-1856"></span> |
| <span class="source-line-no">1857</span><span id="line-1857"> // Note: keep consistent with other methods, see region(Opening|Opened|Closing)</span> |
| <span class="source-line-no">1858</span><span id="line-1858"> // RIT/ServerCrash handling should take care of the transiting regions.</span> |
| <span class="source-line-no">1859</span><span id="line-1859"> if (</span> |
| <span class="source-line-no">1860</span><span id="line-1860"> localState.matches(State.OPEN, State.OPENING, State.CLOSING, State.SPLITTING, State.MERGING)</span> |
| <span class="source-line-no">1861</span><span id="line-1861"> ) {</span> |
| <span class="source-line-no">1862</span><span id="line-1862"> assert regionLocation != null : "found null region location for " + regionNode;</span> |
| <span class="source-line-no">1863</span><span id="line-1863"> // TODO: this could lead to some orphan server state nodes, as it is possible that the</span> |
| <span class="source-line-no">1864</span><span id="line-1864"> // region server is already dead and its SCP has already finished but we have</span> |
| <span class="source-line-no">1865</span><span id="line-1865"> // persisted an opening state on this region server. Finally the TRSP will assign the</span> |
| <span class="source-line-no">1866</span><span id="line-1866"> // region to another region server, so it will not cause critical problems, just waste</span> |
| <span class="source-line-no">1867</span><span id="line-1867"> // some memory as no one will try to cleanup these orphan server state nodes.</span> |
| <span class="source-line-no">1868</span><span id="line-1868"> regionStates.createServer(regionLocation);</span> |
| <span class="source-line-no">1869</span><span id="line-1869"> regionStates.addRegionToServer(regionNode);</span> |
| <span class="source-line-no">1870</span><span id="line-1870"> } else if (localState == State.OFFLINE || regionInfo.isOffline()) {</span> |
| <span class="source-line-no">1871</span><span id="line-1871"> regionStates.addToOfflineRegions(regionNode);</span> |
| <span class="source-line-no">1872</span><span id="line-1872"> }</span> |
| <span class="source-line-no">1873</span><span id="line-1873"> if (regionNode.getProcedure() != null) {</span> |
| <span class="source-line-no">1874</span><span id="line-1874"> regionNode.getProcedure().stateLoaded(AssignmentManager.this, regionNode);</span> |
| <span class="source-line-no">1875</span><span id="line-1875"> }</span> |
| <span class="source-line-no">1876</span><span id="line-1876"> }</span> |
| <span class="source-line-no">1877</span><span id="line-1877"> };</span> |
| <span class="source-line-no">1878</span><span id="line-1878"></span> |
| <span class="source-line-no">1879</span><span id="line-1879"> /**</span> |
| <span class="source-line-no">1880</span><span id="line-1880"> * Attempt to load {@code regionInfo} from META, adding any results to the</span> |
| <span class="source-line-no">1881</span><span id="line-1881"> * {@link #regionStateStore} Is NOT aware of replica regions.</span> |
| <span class="source-line-no">1882</span><span id="line-1882"> * @param regionInfo the region to be loaded from META.</span> |
| <span class="source-line-no">1883</span><span id="line-1883"> * @throws IOException If some error occurs while querying META or parsing results.</span> |
| <span class="source-line-no">1884</span><span id="line-1884"> */</span> |
| <span class="source-line-no">1885</span><span id="line-1885"> public void populateRegionStatesFromMeta(@NonNull final RegionInfo regionInfo)</span> |
| <span class="source-line-no">1886</span><span id="line-1886"> throws IOException {</span> |
| <span class="source-line-no">1887</span><span id="line-1887"> final String regionEncodedName = RegionInfo.DEFAULT_REPLICA_ID == regionInfo.getReplicaId()</span> |
| <span class="source-line-no">1888</span><span id="line-1888"> ? regionInfo.getEncodedName()</span> |
| <span class="source-line-no">1889</span><span id="line-1889"> : RegionInfoBuilder.newBuilder(regionInfo).setReplicaId(RegionInfo.DEFAULT_REPLICA_ID).build()</span> |
| <span class="source-line-no">1890</span><span id="line-1890"> .getEncodedName();</span> |
| <span class="source-line-no">1891</span><span id="line-1891"> populateRegionStatesFromMeta(regionEncodedName);</span> |
| <span class="source-line-no">1892</span><span id="line-1892"> }</span> |
| <span class="source-line-no">1893</span><span id="line-1893"></span> |
| <span class="source-line-no">1894</span><span id="line-1894"> /**</span> |
| <span class="source-line-no">1895</span><span id="line-1895"> * Attempt to load {@code regionEncodedName} from META, adding any results to the</span> |
| <span class="source-line-no">1896</span><span id="line-1896"> * {@link #regionStateStore} Is NOT aware of replica regions.</span> |
| <span class="source-line-no">1897</span><span id="line-1897"> * @param regionEncodedName encoded name for the region to be loaded from META.</span> |
| <span class="source-line-no">1898</span><span id="line-1898"> * @throws IOException If some error occurs while querying META or parsing results.</span> |
| <span class="source-line-no">1899</span><span id="line-1899"> */</span> |
| <span class="source-line-no">1900</span><span id="line-1900"> public void populateRegionStatesFromMeta(@NonNull String regionEncodedName) throws IOException {</span> |
| <span class="source-line-no">1901</span><span id="line-1901"> final RegionMetaLoadingVisitor visitor = new RegionMetaLoadingVisitor();</span> |
| <span class="source-line-no">1902</span><span id="line-1902"> regionStateStore.visitMetaForRegion(regionEncodedName, visitor);</span> |
| <span class="source-line-no">1903</span><span id="line-1903"> }</span> |
| <span class="source-line-no">1904</span><span id="line-1904"></span> |
| <span class="source-line-no">1905</span><span id="line-1905"> private void loadMeta() throws IOException {</span> |
| <span class="source-line-no">1906</span><span id="line-1906"> // TODO: use a thread pool</span> |
| <span class="source-line-no">1907</span><span id="line-1907"> regionStateStore.visitMeta(new RegionMetaLoadingVisitor());</span> |
| <span class="source-line-no">1908</span><span id="line-1908"> }</span> |
| <span class="source-line-no">1909</span><span id="line-1909"></span> |
| <span class="source-line-no">1910</span><span id="line-1910"> /**</span> |
| <span class="source-line-no">1911</span><span id="line-1911"> * Used to check if the meta loading is done.</span> |
| <span class="source-line-no">1912</span><span id="line-1912"> * <p/></span> |
| <span class="source-line-no">1913</span><span id="line-1913"> * if not we throw PleaseHoldException since we are rebuilding the RegionStates</span> |
| <span class="source-line-no">1914</span><span id="line-1914"> * @param hri region to check if it is already rebuild</span> |
| <span class="source-line-no">1915</span><span id="line-1915"> * @throws PleaseHoldException if meta has not been loaded yet</span> |
| <span class="source-line-no">1916</span><span id="line-1916"> */</span> |
| <span class="source-line-no">1917</span><span id="line-1917"> private void checkMetaLoaded(RegionInfo hri) throws PleaseHoldException {</span> |
| <span class="source-line-no">1918</span><span id="line-1918"> if (!isRunning()) {</span> |
| <span class="source-line-no">1919</span><span id="line-1919"> throw new PleaseHoldException("AssignmentManager not running");</span> |
| <span class="source-line-no">1920</span><span id="line-1920"> }</span> |
| <span class="source-line-no">1921</span><span id="line-1921"> boolean meta = isMetaRegion(hri);</span> |
| <span class="source-line-no">1922</span><span id="line-1922"> boolean metaLoaded = isMetaLoaded();</span> |
| <span class="source-line-no">1923</span><span id="line-1923"> if (!meta && !metaLoaded) {</span> |
| <span class="source-line-no">1924</span><span id="line-1924"> throw new PleaseHoldException(</span> |
| <span class="source-line-no">1925</span><span id="line-1925"> "Master not fully online; hbase:meta=" + meta + ", metaLoaded=" + metaLoaded);</span> |
| <span class="source-line-no">1926</span><span id="line-1926"> }</span> |
| <span class="source-line-no">1927</span><span id="line-1927"> }</span> |
| <span class="source-line-no">1928</span><span id="line-1928"></span> |
| <span class="source-line-no">1929</span><span id="line-1929"> // ============================================================================================</span> |
| <span class="source-line-no">1930</span><span id="line-1930"> // TODO: Metrics</span> |
| <span class="source-line-no">1931</span><span id="line-1931"> // ============================================================================================</span> |
| <span class="source-line-no">1932</span><span id="line-1932"> public int getNumRegionsOpened() {</span> |
| <span class="source-line-no">1933</span><span id="line-1933"> // TODO: Used by TestRegionPlacement.java and assume monotonically increasing value</span> |
| <span class="source-line-no">1934</span><span id="line-1934"> return 0;</span> |
| <span class="source-line-no">1935</span><span id="line-1935"> }</span> |
| <span class="source-line-no">1936</span><span id="line-1936"></span> |
| <span class="source-line-no">1937</span><span id="line-1937"> /**</span> |
| <span class="source-line-no">1938</span><span id="line-1938"> * Usually run by the Master in reaction to server crash during normal processing. Can also be</span> |
| <span class="source-line-no">1939</span><span id="line-1939"> * invoked via external RPC to effect repair; in the latter case, the 'force' flag is set so we</span> |
| <span class="source-line-no">1940</span><span id="line-1940"> * push through the SCP though context may indicate already-running-SCP (An old SCP may have</span> |
| <span class="source-line-no">1941</span><span id="line-1941"> * exited abnormally, or damaged cluster may still have references in hbase:meta to 'Unknown</span> |
| <span class="source-line-no">1942</span><span id="line-1942"> * Servers' -- servers that are not online or in dead servers list, etc.)</span> |
| <span class="source-line-no">1943</span><span id="line-1943"> * @param force Set if the request came in externally over RPC (via hbck2). Force means run the</span> |
| <span class="source-line-no">1944</span><span id="line-1944"> * SCP even if it seems as though there might be an outstanding SCP running.</span> |
| <span class="source-line-no">1945</span><span id="line-1945"> * @return pid of scheduled SCP or {@link Procedure#NO_PROC_ID} if none scheduled.</span> |
| <span class="source-line-no">1946</span><span id="line-1946"> */</span> |
| <span class="source-line-no">1947</span><span id="line-1947"> public long submitServerCrash(ServerName serverName, boolean shouldSplitWal, boolean force) {</span> |
| <span class="source-line-no">1948</span><span id="line-1948"> // May be an 'Unknown Server' so handle case where serverNode is null.</span> |
| <span class="source-line-no">1949</span><span id="line-1949"> ServerStateNode serverNode = regionStates.getServerNode(serverName);</span> |
| <span class="source-line-no">1950</span><span id="line-1950"> // Remove the in-memory rsReports result</span> |
| <span class="source-line-no">1951</span><span id="line-1951"> synchronized (rsReports) {</span> |
| <span class="source-line-no">1952</span><span id="line-1952"> rsReports.remove(serverName);</span> |
| <span class="source-line-no">1953</span><span id="line-1953"> }</span> |
| <span class="source-line-no">1954</span><span id="line-1954"> if (serverNode == null) {</span> |
| <span class="source-line-no">1955</span><span id="line-1955"> if (force) {</span> |
| <span class="source-line-no">1956</span><span id="line-1956"> LOG.info("Force adding ServerCrashProcedure for {} when server node is null", serverName);</span> |
| <span class="source-line-no">1957</span><span id="line-1957"> } else {</span> |
| <span class="source-line-no">1958</span><span id="line-1958"> // for normal case, do not schedule SCP if ServerStateNode is null</span> |
| <span class="source-line-no">1959</span><span id="line-1959"> LOG.warn("Skip adding ServerCrashProcedure for {} because server node is null", serverName);</span> |
| <span class="source-line-no">1960</span><span id="line-1960"> return Procedure.NO_PROC_ID;</span> |
| <span class="source-line-no">1961</span><span id="line-1961"> }</span> |
| <span class="source-line-no">1962</span><span id="line-1962"> }</span> |
| <span class="source-line-no">1963</span><span id="line-1963"></span> |
| <span class="source-line-no">1964</span><span id="line-1964"> ProcedureExecutor<MasterProcedureEnv> procExec = this.master.getMasterProcedureExecutor();</span> |
| <span class="source-line-no">1965</span><span id="line-1965"> // We hold the write lock here for fencing on reportRegionStateTransition. Once we set the</span> |
| <span class="source-line-no">1966</span><span id="line-1966"> // server state to CRASHED, we will no longer accept the reportRegionStateTransition call from</span> |
| <span class="source-line-no">1967</span><span id="line-1967"> // this server. This is used to simplify the implementation for TRSP and SCP, where we can make</span> |
| <span class="source-line-no">1968</span><span id="line-1968"> // sure that, the region list fetched by SCP will not be changed any more.</span> |
| <span class="source-line-no">1969</span><span id="line-1969"> if (serverNode != null) {</span> |
| <span class="source-line-no">1970</span><span id="line-1970"> serverNode.writeLock().lock();</span> |
| <span class="source-line-no">1971</span><span id="line-1971"> }</span> |
| <span class="source-line-no">1972</span><span id="line-1972"> try {</span> |
| <span class="source-line-no">1973</span><span id="line-1973"></span> |
| <span class="source-line-no">1974</span><span id="line-1974"> boolean carryingMeta = isCarryingMeta(serverName);</span> |
| <span class="source-line-no">1975</span><span id="line-1975"> if (serverNode != null && !serverNode.isInState(ServerState.ONLINE)) {</span> |
| <span class="source-line-no">1976</span><span id="line-1976"> if (force) {</span> |
| <span class="source-line-no">1977</span><span id="line-1977"> LOG.info("Force adding ServerCrashProcedure for {} (meta={}) when state is not {}",</span> |
| <span class="source-line-no">1978</span><span id="line-1978"> serverNode, carryingMeta, ServerState.ONLINE);</span> |
| <span class="source-line-no">1979</span><span id="line-1979"> } else {</span> |
| <span class="source-line-no">1980</span><span id="line-1980"> LOG.info("Skip adding ServerCrashProcedure for {} (meta={}) when state is not {}",</span> |
| <span class="source-line-no">1981</span><span id="line-1981"> serverNode, carryingMeta, ServerState.ONLINE);</span> |
| <span class="source-line-no">1982</span><span id="line-1982"> return Procedure.NO_PROC_ID;</span> |
| <span class="source-line-no">1983</span><span id="line-1983"> }</span> |
| <span class="source-line-no">1984</span><span id="line-1984"> }</span> |
| <span class="source-line-no">1985</span><span id="line-1985"> MasterProcedureEnv mpe = procExec.getEnvironment();</span> |
| <span class="source-line-no">1986</span><span id="line-1986"> // If serverNode == null, then 'Unknown Server'. Schedule HBCKSCP instead.</span> |
| <span class="source-line-no">1987</span><span id="line-1987"> // HBCKSCP scours Master in-memory state AND hbase;meta for references to</span> |
| <span class="source-line-no">1988</span><span id="line-1988"> // serverName just-in-case. An SCP that is scheduled when the server is</span> |
| <span class="source-line-no">1989</span><span id="line-1989"> // 'Unknown' probably originated externally with HBCK2 fix-it tool.</span> |
| <span class="source-line-no">1990</span><span id="line-1990"> ServerState oldState = null;</span> |
| <span class="source-line-no">1991</span><span id="line-1991"> if (serverNode != null) {</span> |
| <span class="source-line-no">1992</span><span id="line-1992"> oldState = serverNode.getState();</span> |
| <span class="source-line-no">1993</span><span id="line-1993"> serverNode.setState(ServerState.CRASHED);</span> |
| <span class="source-line-no">1994</span><span id="line-1994"> }</span> |
| <span class="source-line-no">1995</span><span id="line-1995"> ServerCrashProcedure scp = force</span> |
| <span class="source-line-no">1996</span><span id="line-1996"> ? new HBCKServerCrashProcedure(mpe, serverName, shouldSplitWal, carryingMeta)</span> |
| <span class="source-line-no">1997</span><span id="line-1997"> : new ServerCrashProcedure(mpe, serverName, shouldSplitWal, carryingMeta);</span> |
| <span class="source-line-no">1998</span><span id="line-1998"> long pid = procExec.submitProcedure(scp);</span> |
| <span class="source-line-no">1999</span><span id="line-1999"> LOG.info("Scheduled ServerCrashProcedure pid={} for {} (carryingMeta={}){}.", pid, serverName,</span> |
| <span class="source-line-no">2000</span><span id="line-2000"> carryingMeta,</span> |
| <span class="source-line-no">2001</span><span id="line-2001"> serverNode == null ? "" : " " + serverNode.toString() + ", oldState=" + oldState);</span> |
| <span class="source-line-no">2002</span><span id="line-2002"> return pid;</span> |
| <span class="source-line-no">2003</span><span id="line-2003"> } finally {</span> |
| <span class="source-line-no">2004</span><span id="line-2004"> if (serverNode != null) {</span> |
| <span class="source-line-no">2005</span><span id="line-2005"> serverNode.writeLock().unlock();</span> |
| <span class="source-line-no">2006</span><span id="line-2006"> }</span> |
| <span class="source-line-no">2007</span><span id="line-2007"> }</span> |
| <span class="source-line-no">2008</span><span id="line-2008"> }</span> |
| <span class="source-line-no">2009</span><span id="line-2009"></span> |
| <span class="source-line-no">2010</span><span id="line-2010"> public void offlineRegion(final RegionInfo regionInfo) {</span> |
| <span class="source-line-no">2011</span><span id="line-2011"> // TODO used by MasterRpcServices</span> |
| <span class="source-line-no">2012</span><span id="line-2012"> RegionStateNode node = regionStates.getRegionStateNode(regionInfo);</span> |
| <span class="source-line-no">2013</span><span id="line-2013"> if (node != null) {</span> |
| <span class="source-line-no">2014</span><span id="line-2014"> node.offline();</span> |
| <span class="source-line-no">2015</span><span id="line-2015"> }</span> |
| <span class="source-line-no">2016</span><span id="line-2016"> }</span> |
| <span class="source-line-no">2017</span><span id="line-2017"></span> |
| <span class="source-line-no">2018</span><span id="line-2018"> public void onlineRegion(final RegionInfo regionInfo, final ServerName serverName) {</span> |
| <span class="source-line-no">2019</span><span id="line-2019"> // TODO used by TestSplitTransactionOnCluster.java</span> |
| <span class="source-line-no">2020</span><span id="line-2020"> }</span> |
| <span class="source-line-no">2021</span><span id="line-2021"></span> |
| <span class="source-line-no">2022</span><span id="line-2022"> public Map<ServerName, List<RegionInfo>></span> |
| <span class="source-line-no">2023</span><span id="line-2023"> getSnapShotOfAssignment(final Collection<RegionInfo> regions) {</span> |
| <span class="source-line-no">2024</span><span id="line-2024"> return regionStates.getSnapShotOfAssignment(regions);</span> |
| <span class="source-line-no">2025</span><span id="line-2025"> }</span> |
| <span class="source-line-no">2026</span><span id="line-2026"></span> |
| <span class="source-line-no">2027</span><span id="line-2027"> // ============================================================================================</span> |
| <span class="source-line-no">2028</span><span id="line-2028"> // TODO: UTILS/HELPERS?</span> |
| <span class="source-line-no">2029</span><span id="line-2029"> // ============================================================================================</span> |
| <span class="source-line-no">2030</span><span id="line-2030"> /**</span> |
| <span class="source-line-no">2031</span><span id="line-2031"> * Used by the client (via master) to identify if all regions have the schema updates</span> |
| <span class="source-line-no">2032</span><span id="line-2032"> * @return Pair indicating the status of the alter command (pending/total)</span> |
| <span class="source-line-no">2033</span><span id="line-2033"> */</span> |
| <span class="source-line-no">2034</span><span id="line-2034"> public Pair<Integer, Integer> getReopenStatus(TableName tableName) {</span> |
| <span class="source-line-no">2035</span><span id="line-2035"> if (isTableDisabled(tableName)) {</span> |
| <span class="source-line-no">2036</span><span id="line-2036"> return new Pair<Integer, Integer>(0, 0);</span> |
| <span class="source-line-no">2037</span><span id="line-2037"> }</span> |
| <span class="source-line-no">2038</span><span id="line-2038"></span> |
| <span class="source-line-no">2039</span><span id="line-2039"> final List<RegionState> states = regionStates.getTableRegionStates(tableName);</span> |
| <span class="source-line-no">2040</span><span id="line-2040"> int ritCount = 0;</span> |
| <span class="source-line-no">2041</span><span id="line-2041"> for (RegionState regionState : states) {</span> |
| <span class="source-line-no">2042</span><span id="line-2042"> if (!regionState.isOpened() && !regionState.isSplit()) {</span> |
| <span class="source-line-no">2043</span><span id="line-2043"> ritCount++;</span> |
| <span class="source-line-no">2044</span><span id="line-2044"> }</span> |
| <span class="source-line-no">2045</span><span id="line-2045"> }</span> |
| <span class="source-line-no">2046</span><span id="line-2046"> return new Pair<Integer, Integer>(ritCount, states.size());</span> |
| <span class="source-line-no">2047</span><span id="line-2047"> }</span> |
| <span class="source-line-no">2048</span><span id="line-2048"></span> |
| <span class="source-line-no">2049</span><span id="line-2049"> // ============================================================================================</span> |
| <span class="source-line-no">2050</span><span id="line-2050"> // TODO: Region State In Transition</span> |
| <span class="source-line-no">2051</span><span id="line-2051"> // ============================================================================================</span> |
| <span class="source-line-no">2052</span><span id="line-2052"> public boolean hasRegionsInTransition() {</span> |
| <span class="source-line-no">2053</span><span id="line-2053"> return regionStates.hasRegionsInTransition();</span> |
| <span class="source-line-no">2054</span><span id="line-2054"> }</span> |
| <span class="source-line-no">2055</span><span id="line-2055"></span> |
| <span class="source-line-no">2056</span><span id="line-2056"> public List<RegionStateNode> getRegionsInTransition() {</span> |
| <span class="source-line-no">2057</span><span id="line-2057"> return regionStates.getRegionsInTransition();</span> |
| <span class="source-line-no">2058</span><span id="line-2058"> }</span> |
| <span class="source-line-no">2059</span><span id="line-2059"></span> |
| <span class="source-line-no">2060</span><span id="line-2060"> public List<RegionInfo> getAssignedRegions() {</span> |
| <span class="source-line-no">2061</span><span id="line-2061"> return regionStates.getAssignedRegions();</span> |
| <span class="source-line-no">2062</span><span id="line-2062"> }</span> |
| <span class="source-line-no">2063</span><span id="line-2063"></span> |
| <span class="source-line-no">2064</span><span id="line-2064"> /**</span> |
| <span class="source-line-no">2065</span><span id="line-2065"> * Resolve a cached {@link RegionInfo} from the region name as a {@code byte[]}.</span> |
| <span class="source-line-no">2066</span><span id="line-2066"> */</span> |
| <span class="source-line-no">2067</span><span id="line-2067"> public RegionInfo getRegionInfo(final byte[] regionName) {</span> |
| <span class="source-line-no">2068</span><span id="line-2068"> final RegionStateNode regionState = regionStates.getRegionStateNodeFromName(regionName);</span> |
| <span class="source-line-no">2069</span><span id="line-2069"> return regionState != null ? regionState.getRegionInfo() : null;</span> |
| <span class="source-line-no">2070</span><span id="line-2070"> }</span> |
| <span class="source-line-no">2071</span><span id="line-2071"></span> |
| <span class="source-line-no">2072</span><span id="line-2072"> /**</span> |
| <span class="source-line-no">2073</span><span id="line-2073"> * Resolve a cached {@link RegionInfo} from the encoded region name as a {@code String}.</span> |
| <span class="source-line-no">2074</span><span id="line-2074"> */</span> |
| <span class="source-line-no">2075</span><span id="line-2075"> public RegionInfo getRegionInfo(final String encodedRegionName) {</span> |
| <span class="source-line-no">2076</span><span id="line-2076"> final RegionStateNode regionState =</span> |
| <span class="source-line-no">2077</span><span id="line-2077"> regionStates.getRegionStateNodeFromEncodedRegionName(encodedRegionName);</span> |
| <span class="source-line-no">2078</span><span id="line-2078"> return regionState != null ? regionState.getRegionInfo() : null;</span> |
| <span class="source-line-no">2079</span><span id="line-2079"> }</span> |
| <span class="source-line-no">2080</span><span id="line-2080"></span> |
| <span class="source-line-no">2081</span><span id="line-2081"> // ============================================================================================</span> |
| <span class="source-line-no">2082</span><span id="line-2082"> // Expected states on region state transition.</span> |
| <span class="source-line-no">2083</span><span id="line-2083"> // Notice that there is expected states for transiting to OPENING state, this is because SCP.</span> |
| <span class="source-line-no">2084</span><span id="line-2084"> // See the comments in regionOpening method for more details.</span> |
| <span class="source-line-no">2085</span><span id="line-2085"> // ============================================================================================</span> |
| <span class="source-line-no">2086</span><span id="line-2086"> private static final State[] STATES_EXPECTED_ON_OPEN = { State.OPENING, // Normal case</span> |
| <span class="source-line-no">2087</span><span id="line-2087"> State.OPEN // Retrying</span> |
| <span class="source-line-no">2088</span><span id="line-2088"> };</span> |
| <span class="source-line-no">2089</span><span id="line-2089"></span> |
| <span class="source-line-no">2090</span><span id="line-2090"> private static final State[] STATES_EXPECTED_ON_CLOSING = { State.OPEN, // Normal case</span> |
| <span class="source-line-no">2091</span><span id="line-2091"> State.CLOSING, // Retrying</span> |
| <span class="source-line-no">2092</span><span id="line-2092"> State.SPLITTING, // Offline the split parent</span> |
| <span class="source-line-no">2093</span><span id="line-2093"> State.MERGING // Offline the merge parents</span> |
| <span class="source-line-no">2094</span><span id="line-2094"> };</span> |
| <span class="source-line-no">2095</span><span id="line-2095"></span> |
| <span class="source-line-no">2096</span><span id="line-2096"> private static final State[] STATES_EXPECTED_ON_CLOSED = { State.CLOSING, // Normal case</span> |
| <span class="source-line-no">2097</span><span id="line-2097"> State.CLOSED // Retrying</span> |
| <span class="source-line-no">2098</span><span id="line-2098"> };</span> |
| <span class="source-line-no">2099</span><span id="line-2099"></span> |
| <span class="source-line-no">2100</span><span id="line-2100"> // This is for manually scheduled region assign, can add other states later if we find out other</span> |
| <span class="source-line-no">2101</span><span id="line-2101"> // usages</span> |
| <span class="source-line-no">2102</span><span id="line-2102"> private static final State[] STATES_EXPECTED_ON_ASSIGN = { State.CLOSED, State.OFFLINE };</span> |
| <span class="source-line-no">2103</span><span id="line-2103"></span> |
| <span class="source-line-no">2104</span><span id="line-2104"> // We only allow unassign or move a region which is in OPEN state.</span> |
| <span class="source-line-no">2105</span><span id="line-2105"> private static final State[] STATES_EXPECTED_ON_UNASSIGN_OR_MOVE = { State.OPEN };</span> |
| <span class="source-line-no">2106</span><span id="line-2106"></span> |
| <span class="source-line-no">2107</span><span id="line-2107"> // ============================================================================================</span> |
| <span class="source-line-no">2108</span><span id="line-2108"> // Region Status update</span> |
| <span class="source-line-no">2109</span><span id="line-2109"> // Should only be called in TransitRegionStateProcedure(and related procedures), as the locking</span> |
| <span class="source-line-no">2110</span><span id="line-2110"> // and pre-assumptions are very tricky.</span> |
| <span class="source-line-no">2111</span><span id="line-2111"> // ============================================================================================</span> |
| <span class="source-line-no">2112</span><span id="line-2112"> private CompletableFuture<Void> transitStateAndUpdate(RegionStateNode regionNode,</span> |
| <span class="source-line-no">2113</span><span id="line-2113"> RegionState.State newState, RegionState.State... expectedStates) {</span> |
| <span class="source-line-no">2114</span><span id="line-2114"> RegionState.State state = regionNode.getState();</span> |
| <span class="source-line-no">2115</span><span id="line-2115"> try {</span> |
| <span class="source-line-no">2116</span><span id="line-2116"> regionNode.transitionState(newState, expectedStates);</span> |
| <span class="source-line-no">2117</span><span id="line-2117"> } catch (UnexpectedStateException e) {</span> |
| <span class="source-line-no">2118</span><span id="line-2118"> return FutureUtils.failedFuture(e);</span> |
| <span class="source-line-no">2119</span><span id="line-2119"> }</span> |
| <span class="source-line-no">2120</span><span id="line-2120"> CompletableFuture<Void> future = regionStateStore.updateRegionLocation(regionNode);</span> |
| <span class="source-line-no">2121</span><span id="line-2121"> FutureUtils.addListener(future, (r, e) -> {</span> |
| <span class="source-line-no">2122</span><span id="line-2122"> if (e != null) {</span> |
| <span class="source-line-no">2123</span><span id="line-2123"> // revert</span> |
| <span class="source-line-no">2124</span><span id="line-2124"> regionNode.setState(state);</span> |
| <span class="source-line-no">2125</span><span id="line-2125"> }</span> |
| <span class="source-line-no">2126</span><span id="line-2126"> });</span> |
| <span class="source-line-no">2127</span><span id="line-2127"> return future;</span> |
| <span class="source-line-no">2128</span><span id="line-2128"> }</span> |
| <span class="source-line-no">2129</span><span id="line-2129"></span> |
| <span class="source-line-no">2130</span><span id="line-2130"> // should be called within the synchronized block of RegionStateNode</span> |
| <span class="source-line-no">2131</span><span id="line-2131"> CompletableFuture<Void> regionOpening(RegionStateNode regionNode) {</span> |
| <span class="source-line-no">2132</span><span id="line-2132"> // As in SCP, for performance reason, there is no TRSP attached with this region, we will not</span> |
| <span class="source-line-no">2133</span><span id="line-2133"> // update the region state, which means that the region could be in any state when we want to</span> |
| <span class="source-line-no">2134</span><span id="line-2134"> // assign it after a RS crash. So here we do not pass the expectedStates parameter.</span> |
| <span class="source-line-no">2135</span><span id="line-2135"> return transitStateAndUpdate(regionNode, State.OPENING).thenAccept(r -> {</span> |
| <span class="source-line-no">2136</span><span id="line-2136"> ServerStateNode serverNode = regionStates.getServerNode(regionNode.getRegionLocation());</span> |
| <span class="source-line-no">2137</span><span id="line-2137"> // Here the server node could be null. For example, we want to assign the region to a given</span> |
| <span class="source-line-no">2138</span><span id="line-2138"> // region server and it crashes, and it is the region server which holds hbase:meta, then the</span> |
| <span class="source-line-no">2139</span><span id="line-2139"> // above transitStateAndUpdate call will never succeed until we finishes the SCP for it. But</span> |
| <span class="source-line-no">2140</span><span id="line-2140"> // after the SCP finishes, the server node will be removed, so when we arrive there, the</span> |
| <span class="source-line-no">2141</span><span id="line-2141"> // server</span> |
| <span class="source-line-no">2142</span><span id="line-2142"> // node will be null. This is not a big problem if we skip adding it, as later we will fail to</span> |
| <span class="source-line-no">2143</span><span id="line-2143"> // execute the remote procedure on the region server and then try to assign to another region</span> |
| <span class="source-line-no">2144</span><span id="line-2144"> // server</span> |
| <span class="source-line-no">2145</span><span id="line-2145"> if (serverNode != null) {</span> |
| <span class="source-line-no">2146</span><span id="line-2146"> serverNode.addRegion(regionNode);</span> |
| <span class="source-line-no">2147</span><span id="line-2147"> }</span> |
| <span class="source-line-no">2148</span><span id="line-2148"> // update the operation count metrics</span> |
| <span class="source-line-no">2149</span><span id="line-2149"> metrics.incrementOperationCounter();</span> |
| <span class="source-line-no">2150</span><span id="line-2150"> });</span> |
| <span class="source-line-no">2151</span><span id="line-2151"> }</span> |
| <span class="source-line-no">2152</span><span id="line-2152"></span> |
| <span class="source-line-no">2153</span><span id="line-2153"> // should be called under the RegionStateNode lock</span> |
| <span class="source-line-no">2154</span><span id="line-2154"> // The parameter 'giveUp' means whether we will try to open the region again, if it is true, then</span> |
| <span class="source-line-no">2155</span><span id="line-2155"> // we will persist the FAILED_OPEN state into hbase:meta.</span> |
| <span class="source-line-no">2156</span><span id="line-2156"> CompletableFuture<Void> regionFailedOpen(RegionStateNode regionNode, boolean giveUp) {</span> |
| <span class="source-line-no">2157</span><span id="line-2157"> RegionState.State state = regionNode.getState();</span> |
| <span class="source-line-no">2158</span><span id="line-2158"> ServerName regionLocation = regionNode.getRegionLocation();</span> |
| <span class="source-line-no">2159</span><span id="line-2159"> if (!giveUp) {</span> |
| <span class="source-line-no">2160</span><span id="line-2160"> if (regionLocation != null) {</span> |
| <span class="source-line-no">2161</span><span id="line-2161"> regionStates.removeRegionFromServer(regionLocation, regionNode);</span> |
| <span class="source-line-no">2162</span><span id="line-2162"> }</span> |
| <span class="source-line-no">2163</span><span id="line-2163"> return CompletableFuture.completedFuture(null);</span> |
| <span class="source-line-no">2164</span><span id="line-2164"> }</span> |
| <span class="source-line-no">2165</span><span id="line-2165"> regionNode.setState(State.FAILED_OPEN);</span> |
| <span class="source-line-no">2166</span><span id="line-2166"> regionNode.setRegionLocation(null);</span> |
| <span class="source-line-no">2167</span><span id="line-2167"> CompletableFuture<Void> future = regionStateStore.updateRegionLocation(regionNode);</span> |
| <span class="source-line-no">2168</span><span id="line-2168"> FutureUtils.addListener(future, (r, e) -> {</span> |
| <span class="source-line-no">2169</span><span id="line-2169"> if (e == null) {</span> |
| <span class="source-line-no">2170</span><span id="line-2170"> if (regionLocation != null) {</span> |
| <span class="source-line-no">2171</span><span id="line-2171"> regionStates.removeRegionFromServer(regionLocation, regionNode);</span> |
| <span class="source-line-no">2172</span><span id="line-2172"> }</span> |
| <span class="source-line-no">2173</span><span id="line-2173"> } else {</span> |
| <span class="source-line-no">2174</span><span id="line-2174"> // revert</span> |
| <span class="source-line-no">2175</span><span id="line-2175"> regionNode.setState(state);</span> |
| <span class="source-line-no">2176</span><span id="line-2176"> regionNode.setRegionLocation(regionLocation);</span> |
| <span class="source-line-no">2177</span><span id="line-2177"> }</span> |
| <span class="source-line-no">2178</span><span id="line-2178"> });</span> |
| <span class="source-line-no">2179</span><span id="line-2179"> return future;</span> |
| <span class="source-line-no">2180</span><span id="line-2180"> }</span> |
| <span class="source-line-no">2181</span><span id="line-2181"></span> |
| <span class="source-line-no">2182</span><span id="line-2182"> // should be called under the RegionStateNode lock</span> |
| <span class="source-line-no">2183</span><span id="line-2183"> CompletableFuture<Void> regionClosing(RegionStateNode regionNode) {</span> |
| <span class="source-line-no">2184</span><span id="line-2184"> return transitStateAndUpdate(regionNode, State.CLOSING, STATES_EXPECTED_ON_CLOSING)</span> |
| <span class="source-line-no">2185</span><span id="line-2185"> .thenAccept(r -> {</span> |
| <span class="source-line-no">2186</span><span id="line-2186"> RegionInfo hri = regionNode.getRegionInfo();</span> |
| <span class="source-line-no">2187</span><span id="line-2187"> // Set meta has not initialized early. so people trying to create/edit tables will wait</span> |
| <span class="source-line-no">2188</span><span id="line-2188"> if (isMetaRegion(hri)) {</span> |
| <span class="source-line-no">2189</span><span id="line-2189"> setMetaAssigned(hri, false);</span> |
| <span class="source-line-no">2190</span><span id="line-2190"> }</span> |
| <span class="source-line-no">2191</span><span id="line-2191"> // update the operation count metrics</span> |
| <span class="source-line-no">2192</span><span id="line-2192"> metrics.incrementOperationCounter();</span> |
| <span class="source-line-no">2193</span><span id="line-2193"> });</span> |
| <span class="source-line-no">2194</span><span id="line-2194"> }</span> |
| <span class="source-line-no">2195</span><span id="line-2195"></span> |
| <span class="source-line-no">2196</span><span id="line-2196"> // for open and close, they will first be persist to the procedure store in</span> |
| <span class="source-line-no">2197</span><span id="line-2197"> // RegionRemoteProcedureBase. So here we will first change the in memory state as it is considered</span> |
| <span class="source-line-no">2198</span><span id="line-2198"> // as succeeded if the persistence to procedure store is succeeded, and then when the</span> |
| <span class="source-line-no">2199</span><span id="line-2199"> // RegionRemoteProcedureBase is woken up, we will persist the RegionStateNode to hbase:meta.</span> |
| <span class="source-line-no">2200</span><span id="line-2200"></span> |
| <span class="source-line-no">2201</span><span id="line-2201"> // should be called under the RegionStateNode lock</span> |
| <span class="source-line-no">2202</span><span id="line-2202"> void regionOpenedWithoutPersistingToMeta(RegionStateNode regionNode)</span> |
| <span class="source-line-no">2203</span><span id="line-2203"> throws UnexpectedStateException {</span> |
| <span class="source-line-no">2204</span><span id="line-2204"> regionNode.transitionState(State.OPEN, STATES_EXPECTED_ON_OPEN);</span> |
| <span class="source-line-no">2205</span><span id="line-2205"> RegionInfo regionInfo = regionNode.getRegionInfo();</span> |
| <span class="source-line-no">2206</span><span id="line-2206"> regionStates.addRegionToServer(regionNode);</span> |
| <span class="source-line-no">2207</span><span id="line-2207"> regionStates.removeFromFailedOpen(regionInfo);</span> |
| <span class="source-line-no">2208</span><span id="line-2208"> }</span> |
| <span class="source-line-no">2209</span><span id="line-2209"></span> |
| <span class="source-line-no">2210</span><span id="line-2210"> // should be called under the RegionStateNode lock</span> |
| <span class="source-line-no">2211</span><span id="line-2211"> void regionClosedWithoutPersistingToMeta(RegionStateNode regionNode)</span> |
| <span class="source-line-no">2212</span><span id="line-2212"> throws UnexpectedStateException {</span> |
| <span class="source-line-no">2213</span><span id="line-2213"> ServerName regionLocation = regionNode.getRegionLocation();</span> |
| <span class="source-line-no">2214</span><span id="line-2214"> regionNode.transitionState(State.CLOSED, STATES_EXPECTED_ON_CLOSED);</span> |
| <span class="source-line-no">2215</span><span id="line-2215"> regionNode.setRegionLocation(null);</span> |
| <span class="source-line-no">2216</span><span id="line-2216"> if (regionLocation != null) {</span> |
| <span class="source-line-no">2217</span><span id="line-2217"> regionNode.setLastHost(regionLocation);</span> |
| <span class="source-line-no">2218</span><span id="line-2218"> regionStates.removeRegionFromServer(regionLocation, regionNode);</span> |
| <span class="source-line-no">2219</span><span id="line-2219"> }</span> |
| <span class="source-line-no">2220</span><span id="line-2220"> }</span> |
| <span class="source-line-no">2221</span><span id="line-2221"></span> |
| <span class="source-line-no">2222</span><span id="line-2222"> // should be called under the RegionStateNode lock</span> |
| <span class="source-line-no">2223</span><span id="line-2223"> CompletableFuture<Void> persistToMeta(RegionStateNode regionNode) {</span> |
| <span class="source-line-no">2224</span><span id="line-2224"> return regionStateStore.updateRegionLocation(regionNode).thenAccept(r -> {</span> |
| <span class="source-line-no">2225</span><span id="line-2225"> RegionInfo regionInfo = regionNode.getRegionInfo();</span> |
| <span class="source-line-no">2226</span><span id="line-2226"> if (isMetaRegion(regionInfo) && regionNode.getState() == State.OPEN) {</span> |
| <span class="source-line-no">2227</span><span id="line-2227"> // Usually we'd set a table ENABLED at this stage but hbase:meta is ALWAYs enabled, it</span> |
| <span class="source-line-no">2228</span><span id="line-2228"> // can't be disabled -- so skip the RPC (besides... enabled is managed by TableStateManager</span> |
| <span class="source-line-no">2229</span><span id="line-2229"> // which is backed by hbase:meta... Avoid setting ENABLED to avoid having to update state</span> |
| <span class="source-line-no">2230</span><span id="line-2230"> // on table that contains state.</span> |
| <span class="source-line-no">2231</span><span id="line-2231"> setMetaAssigned(regionInfo, true);</span> |
| <span class="source-line-no">2232</span><span id="line-2232"> }</span> |
| <span class="source-line-no">2233</span><span id="line-2233"> });</span> |
| <span class="source-line-no">2234</span><span id="line-2234"> }</span> |
| <span class="source-line-no">2235</span><span id="line-2235"></span> |
| <span class="source-line-no">2236</span><span id="line-2236"> // should be called under the RegionStateNode lock</span> |
| <span class="source-line-no">2237</span><span id="line-2237"> // for SCP</span> |
| <span class="source-line-no">2238</span><span id="line-2238"> public CompletableFuture<Void> regionClosedAbnormally(RegionStateNode regionNode) {</span> |
| <span class="source-line-no">2239</span><span id="line-2239"> RegionState.State state = regionNode.getState();</span> |
| <span class="source-line-no">2240</span><span id="line-2240"> ServerName regionLocation = regionNode.getRegionLocation();</span> |
| <span class="source-line-no">2241</span><span id="line-2241"> regionNode.setState(State.ABNORMALLY_CLOSED);</span> |
| <span class="source-line-no">2242</span><span id="line-2242"> regionNode.setRegionLocation(null);</span> |
| <span class="source-line-no">2243</span><span id="line-2243"> CompletableFuture<Void> future = regionStateStore.updateRegionLocation(regionNode);</span> |
| <span class="source-line-no">2244</span><span id="line-2244"> FutureUtils.addListener(future, (r, e) -> {</span> |
| <span class="source-line-no">2245</span><span id="line-2245"> if (e == null) {</span> |
| <span class="source-line-no">2246</span><span id="line-2246"> if (regionLocation != null) {</span> |
| <span class="source-line-no">2247</span><span id="line-2247"> regionNode.setLastHost(regionLocation);</span> |
| <span class="source-line-no">2248</span><span id="line-2248"> regionStates.removeRegionFromServer(regionLocation, regionNode);</span> |
| <span class="source-line-no">2249</span><span id="line-2249"> }</span> |
| <span class="source-line-no">2250</span><span id="line-2250"> } else {</span> |
| <span class="source-line-no">2251</span><span id="line-2251"> // revert</span> |
| <span class="source-line-no">2252</span><span id="line-2252"> regionNode.setState(state);</span> |
| <span class="source-line-no">2253</span><span id="line-2253"> regionNode.setRegionLocation(regionLocation);</span> |
| <span class="source-line-no">2254</span><span id="line-2254"> }</span> |
| <span class="source-line-no">2255</span><span id="line-2255"> });</span> |
| <span class="source-line-no">2256</span><span id="line-2256"> return future;</span> |
| <span class="source-line-no">2257</span><span id="line-2257"> }</span> |
| <span class="source-line-no">2258</span><span id="line-2258"></span> |
| <span class="source-line-no">2259</span><span id="line-2259"> // ============================================================================================</span> |
| <span class="source-line-no">2260</span><span id="line-2260"> // The above methods can only be called in TransitRegionStateProcedure(and related procedures)</span> |
| <span class="source-line-no">2261</span><span id="line-2261"> // ============================================================================================</span> |
| <span class="source-line-no">2262</span><span id="line-2262"></span> |
| <span class="source-line-no">2263</span><span id="line-2263"> public void markRegionAsSplit(final RegionInfo parent, final ServerName serverName,</span> |
| <span class="source-line-no">2264</span><span id="line-2264"> final RegionInfo daughterA, final RegionInfo daughterB) throws IOException {</span> |
| <span class="source-line-no">2265</span><span id="line-2265"> // Update hbase:meta. Parent will be marked offline and split up in hbase:meta.</span> |
| <span class="source-line-no">2266</span><span id="line-2266"> // The parent stays in regionStates until cleared when removed by CatalogJanitor.</span> |
| <span class="source-line-no">2267</span><span id="line-2267"> // Update its state in regionStates to it shows as offline and split when read</span> |
| <span class="source-line-no">2268</span><span id="line-2268"> // later figuring what regions are in a table and what are not: see</span> |
| <span class="source-line-no">2269</span><span id="line-2269"> // regionStates#getRegionsOfTable</span> |
| <span class="source-line-no">2270</span><span id="line-2270"> final RegionStateNode node = regionStates.getOrCreateRegionStateNode(parent);</span> |
| <span class="source-line-no">2271</span><span id="line-2271"> node.setState(State.SPLIT);</span> |
| <span class="source-line-no">2272</span><span id="line-2272"> final RegionStateNode nodeA = regionStates.getOrCreateRegionStateNode(daughterA);</span> |
| <span class="source-line-no">2273</span><span id="line-2273"> nodeA.setState(State.SPLITTING_NEW);</span> |
| <span class="source-line-no">2274</span><span id="line-2274"> final RegionStateNode nodeB = regionStates.getOrCreateRegionStateNode(daughterB);</span> |
| <span class="source-line-no">2275</span><span id="line-2275"> nodeB.setState(State.SPLITTING_NEW);</span> |
| <span class="source-line-no">2276</span><span id="line-2276"></span> |
| <span class="source-line-no">2277</span><span id="line-2277"> TableDescriptor td = master.getTableDescriptors().get(parent.getTable());</span> |
| <span class="source-line-no">2278</span><span id="line-2278"> // TODO: here we just update the parent region info in meta, to set split and offline to true,</span> |
| <span class="source-line-no">2279</span><span id="line-2279"> // without changing the one in the region node. This is a bit confusing but the region info</span> |
| <span class="source-line-no">2280</span><span id="line-2280"> // field in RegionStateNode is not expected to be changed in the current design. Need to find a</span> |
| <span class="source-line-no">2281</span><span id="line-2281"> // possible way to address this problem, or at least adding more comments about the trick to</span> |
| <span class="source-line-no">2282</span><span id="line-2282"> // deal with this problem, that when you want to filter out split parent, you need to check both</span> |
| <span class="source-line-no">2283</span><span id="line-2283"> // the RegionState on whether it is split, and also the region info. If one of them matches then</span> |
| <span class="source-line-no">2284</span><span id="line-2284"> // it is a split parent. And usually only one of them can match, as after restart, the region</span> |
| <span class="source-line-no">2285</span><span id="line-2285"> // state will be changed from SPLIT to CLOSED.</span> |
| <span class="source-line-no">2286</span><span id="line-2286"> regionStateStore.splitRegion(parent, daughterA, daughterB, serverName, td);</span> |
| <span class="source-line-no">2287</span><span id="line-2287"> if (shouldAssignFavoredNodes(parent)) {</span> |
| <span class="source-line-no">2288</span><span id="line-2288"> List<ServerName> onlineServers = this.master.getServerManager().getOnlineServersList();</span> |
| <span class="source-line-no">2289</span><span id="line-2289"> getFavoredNodePromoter().generateFavoredNodesForDaughter(onlineServers, parent, daughterA,</span> |
| <span class="source-line-no">2290</span><span id="line-2290"> daughterB);</span> |
| <span class="source-line-no">2291</span><span id="line-2291"> }</span> |
| <span class="source-line-no">2292</span><span id="line-2292"> }</span> |
| <span class="source-line-no">2293</span><span id="line-2293"></span> |
| <span class="source-line-no">2294</span><span id="line-2294"> /**</span> |
| <span class="source-line-no">2295</span><span id="line-2295"> * When called here, the merge has happened. The merged regions have been unassigned and the above</span> |
| <span class="source-line-no">2296</span><span id="line-2296"> * markRegionClosed has been called on each so they have been disassociated from a hosting Server.</span> |
| <span class="source-line-no">2297</span><span id="line-2297"> * The merged region will be open after this call. The merged regions are removed from hbase:meta</span> |
| <span class="source-line-no">2298</span><span id="line-2298"> * below. Later they are deleted from the filesystem by the catalog janitor running against</span> |
| <span class="source-line-no">2299</span><span id="line-2299"> * hbase:meta. It notices when the merged region no longer holds references to the old regions</span> |
| <span class="source-line-no">2300</span><span id="line-2300"> * (References are deleted after a compaction rewrites what the Reference points at but not until</span> |
| <span class="source-line-no">2301</span><span id="line-2301"> * the archiver chore runs, are the References removed).</span> |
| <span class="source-line-no">2302</span><span id="line-2302"> */</span> |
| <span class="source-line-no">2303</span><span id="line-2303"> public void markRegionAsMerged(final RegionInfo child, final ServerName serverName,</span> |
| <span class="source-line-no">2304</span><span id="line-2304"> RegionInfo[] mergeParents) throws IOException {</span> |
| <span class="source-line-no">2305</span><span id="line-2305"> final RegionStateNode node = regionStates.getOrCreateRegionStateNode(child);</span> |
| <span class="source-line-no">2306</span><span id="line-2306"> node.setState(State.MERGED);</span> |
| <span class="source-line-no">2307</span><span id="line-2307"> for (RegionInfo ri : mergeParents) {</span> |
| <span class="source-line-no">2308</span><span id="line-2308"> regionStates.deleteRegion(ri);</span> |
| <span class="source-line-no">2309</span><span id="line-2309"> }</span> |
| <span class="source-line-no">2310</span><span id="line-2310"> TableDescriptor td = master.getTableDescriptors().get(child.getTable());</span> |
| <span class="source-line-no">2311</span><span id="line-2311"> regionStateStore.mergeRegions(child, mergeParents, serverName, td);</span> |
| <span class="source-line-no">2312</span><span id="line-2312"> if (shouldAssignFavoredNodes(child)) {</span> |
| <span class="source-line-no">2313</span><span id="line-2313"> getFavoredNodePromoter().generateFavoredNodesForMergedRegion(child, mergeParents);</span> |
| <span class="source-line-no">2314</span><span id="line-2314"> }</span> |
| <span class="source-line-no">2315</span><span id="line-2315"> }</span> |
| <span class="source-line-no">2316</span><span id="line-2316"></span> |
| <span class="source-line-no">2317</span><span id="line-2317"> /*</span> |
| <span class="source-line-no">2318</span><span id="line-2318"> * Favored nodes should be applied only when FavoredNodes balancer is configured and the region</span> |
| <span class="source-line-no">2319</span><span id="line-2319"> * belongs to a non-system table.</span> |
| <span class="source-line-no">2320</span><span id="line-2320"> */</span> |
| <span class="source-line-no">2321</span><span id="line-2321"> private boolean shouldAssignFavoredNodes(RegionInfo region) {</span> |
| <span class="source-line-no">2322</span><span id="line-2322"> return this.shouldAssignRegionsWithFavoredNodes</span> |
| <span class="source-line-no">2323</span><span id="line-2323"> && FavoredNodesManager.isFavoredNodeApplicable(region);</span> |
| <span class="source-line-no">2324</span><span id="line-2324"> }</span> |
| <span class="source-line-no">2325</span><span id="line-2325"></span> |
| <span class="source-line-no">2326</span><span id="line-2326"> // ============================================================================================</span> |
| <span class="source-line-no">2327</span><span id="line-2327"> // Assign Queue (Assign/Balance)</span> |
| <span class="source-line-no">2328</span><span id="line-2328"> // ============================================================================================</span> |
| <span class="source-line-no">2329</span><span id="line-2329"> private final ArrayList<RegionStateNode> pendingAssignQueue = new ArrayList<RegionStateNode>();</span> |
| <span class="source-line-no">2330</span><span id="line-2330"> private final ReentrantLock assignQueueLock = new ReentrantLock();</span> |
| <span class="source-line-no">2331</span><span id="line-2331"> private final Condition assignQueueFullCond = assignQueueLock.newCondition();</span> |
| <span class="source-line-no">2332</span><span id="line-2332"></span> |
| <span class="source-line-no">2333</span><span id="line-2333"> /**</span> |
| <span class="source-line-no">2334</span><span id="line-2334"> * Add the assign operation to the assignment queue. The pending assignment operation will be</span> |
| <span class="source-line-no">2335</span><span id="line-2335"> * processed, and each region will be assigned by a server using the balancer.</span> |
| <span class="source-line-no">2336</span><span id="line-2336"> */</span> |
| <span class="source-line-no">2337</span><span id="line-2337"> protected void queueAssign(final RegionStateNode regionNode) {</span> |
| <span class="source-line-no">2338</span><span id="line-2338"> regionNode.getProcedureEvent().suspend();</span> |
| <span class="source-line-no">2339</span><span id="line-2339"></span> |
| <span class="source-line-no">2340</span><span id="line-2340"> // TODO: quick-start for meta and the other sys-tables?</span> |
| <span class="source-line-no">2341</span><span id="line-2341"> assignQueueLock.lock();</span> |
| <span class="source-line-no">2342</span><span id="line-2342"> try {</span> |
| <span class="source-line-no">2343</span><span id="line-2343"> pendingAssignQueue.add(regionNode);</span> |
| <span class="source-line-no">2344</span><span id="line-2344"> if (</span> |
| <span class="source-line-no">2345</span><span id="line-2345"> regionNode.isSystemTable() || pendingAssignQueue.size() == 1</span> |
| <span class="source-line-no">2346</span><span id="line-2346"> || pendingAssignQueue.size() >= assignDispatchWaitQueueMaxSize</span> |
| <span class="source-line-no">2347</span><span id="line-2347"> ) {</span> |
| <span class="source-line-no">2348</span><span id="line-2348"> assignQueueFullCond.signal();</span> |
| <span class="source-line-no">2349</span><span id="line-2349"> }</span> |
| <span class="source-line-no">2350</span><span id="line-2350"> } finally {</span> |
| <span class="source-line-no">2351</span><span id="line-2351"> assignQueueLock.unlock();</span> |
| <span class="source-line-no">2352</span><span id="line-2352"> }</span> |
| <span class="source-line-no">2353</span><span id="line-2353"> }</span> |
| <span class="source-line-no">2354</span><span id="line-2354"></span> |
| <span class="source-line-no">2355</span><span id="line-2355"> private void startAssignmentThread() {</span> |
| <span class="source-line-no">2356</span><span id="line-2356"> assignThread = new Thread(master.getServerName().toShortString()) {</span> |
| <span class="source-line-no">2357</span><span id="line-2357"> @Override</span> |
| <span class="source-line-no">2358</span><span id="line-2358"> public void run() {</span> |
| <span class="source-line-no">2359</span><span id="line-2359"> while (isRunning()) {</span> |
| <span class="source-line-no">2360</span><span id="line-2360"> processAssignQueue();</span> |
| <span class="source-line-no">2361</span><span id="line-2361"> }</span> |
| <span class="source-line-no">2362</span><span id="line-2362"> pendingAssignQueue.clear();</span> |
| <span class="source-line-no">2363</span><span id="line-2363"> }</span> |
| <span class="source-line-no">2364</span><span id="line-2364"> };</span> |
| <span class="source-line-no">2365</span><span id="line-2365"> assignThread.setDaemon(true);</span> |
| <span class="source-line-no">2366</span><span id="line-2366"> assignThread.start();</span> |
| <span class="source-line-no">2367</span><span id="line-2367"> }</span> |
| <span class="source-line-no">2368</span><span id="line-2368"></span> |
| <span class="source-line-no">2369</span><span id="line-2369"> private void stopAssignmentThread() {</span> |
| <span class="source-line-no">2370</span><span id="line-2370"> assignQueueSignal();</span> |
| <span class="source-line-no">2371</span><span id="line-2371"> try {</span> |
| <span class="source-line-no">2372</span><span id="line-2372"> while (assignThread.isAlive()) {</span> |
| <span class="source-line-no">2373</span><span id="line-2373"> assignQueueSignal();</span> |
| <span class="source-line-no">2374</span><span id="line-2374"> assignThread.join(250);</span> |
| <span class="source-line-no">2375</span><span id="line-2375"> }</span> |
| <span class="source-line-no">2376</span><span id="line-2376"> } catch (InterruptedException e) {</span> |
| <span class="source-line-no">2377</span><span id="line-2377"> LOG.warn("join interrupted", e);</span> |
| <span class="source-line-no">2378</span><span id="line-2378"> Thread.currentThread().interrupt();</span> |
| <span class="source-line-no">2379</span><span id="line-2379"> }</span> |
| <span class="source-line-no">2380</span><span id="line-2380"> }</span> |
| <span class="source-line-no">2381</span><span id="line-2381"></span> |
| <span class="source-line-no">2382</span><span id="line-2382"> private void assignQueueSignal() {</span> |
| <span class="source-line-no">2383</span><span id="line-2383"> assignQueueLock.lock();</span> |
| <span class="source-line-no">2384</span><span id="line-2384"> try {</span> |
| <span class="source-line-no">2385</span><span id="line-2385"> assignQueueFullCond.signal();</span> |
| <span class="source-line-no">2386</span><span id="line-2386"> } finally {</span> |
| <span class="source-line-no">2387</span><span id="line-2387"> assignQueueLock.unlock();</span> |
| <span class="source-line-no">2388</span><span id="line-2388"> }</span> |
| <span class="source-line-no">2389</span><span id="line-2389"> }</span> |
| <span class="source-line-no">2390</span><span id="line-2390"></span> |
| <span class="source-line-no">2391</span><span id="line-2391"> @edu.umd.cs.findbugs.annotations.SuppressWarnings("WA_AWAIT_NOT_IN_LOOP")</span> |
| <span class="source-line-no">2392</span><span id="line-2392"> private HashMap<RegionInfo, RegionStateNode> waitOnAssignQueue() {</span> |
| <span class="source-line-no">2393</span><span id="line-2393"> HashMap<RegionInfo, RegionStateNode> regions = null;</span> |
| <span class="source-line-no">2394</span><span id="line-2394"></span> |
| <span class="source-line-no">2395</span><span id="line-2395"> assignQueueLock.lock();</span> |
| <span class="source-line-no">2396</span><span id="line-2396"> try {</span> |
| <span class="source-line-no">2397</span><span id="line-2397"> if (pendingAssignQueue.isEmpty() && isRunning()) {</span> |
| <span class="source-line-no">2398</span><span id="line-2398"> assignQueueFullCond.await();</span> |
| <span class="source-line-no">2399</span><span id="line-2399"> }</span> |
| <span class="source-line-no">2400</span><span id="line-2400"></span> |
| <span class="source-line-no">2401</span><span id="line-2401"> if (!isRunning()) {</span> |
| <span class="source-line-no">2402</span><span id="line-2402"> return null;</span> |
| <span class="source-line-no">2403</span><span id="line-2403"> }</span> |
| <span class="source-line-no">2404</span><span id="line-2404"> assignQueueFullCond.await(assignDispatchWaitMillis, TimeUnit.MILLISECONDS);</span> |
| <span class="source-line-no">2405</span><span id="line-2405"> regions = new HashMap<RegionInfo, RegionStateNode>(pendingAssignQueue.size());</span> |
| <span class="source-line-no">2406</span><span id="line-2406"> for (RegionStateNode regionNode : pendingAssignQueue) {</span> |
| <span class="source-line-no">2407</span><span id="line-2407"> regions.put(regionNode.getRegionInfo(), regionNode);</span> |
| <span class="source-line-no">2408</span><span id="line-2408"> }</span> |
| <span class="source-line-no">2409</span><span id="line-2409"> pendingAssignQueue.clear();</span> |
| <span class="source-line-no">2410</span><span id="line-2410"> } catch (InterruptedException e) {</span> |
| <span class="source-line-no">2411</span><span id="line-2411"> LOG.warn("got interrupted ", e);</span> |
| <span class="source-line-no">2412</span><span id="line-2412"> Thread.currentThread().interrupt();</span> |
| <span class="source-line-no">2413</span><span id="line-2413"> } finally {</span> |
| <span class="source-line-no">2414</span><span id="line-2414"> assignQueueLock.unlock();</span> |
| <span class="source-line-no">2415</span><span id="line-2415"> }</span> |
| <span class="source-line-no">2416</span><span id="line-2416"> return regions;</span> |
| <span class="source-line-no">2417</span><span id="line-2417"> }</span> |
| <span class="source-line-no">2418</span><span id="line-2418"></span> |
| <span class="source-line-no">2419</span><span id="line-2419"> private void processAssignQueue() {</span> |
| <span class="source-line-no">2420</span><span id="line-2420"> final HashMap<RegionInfo, RegionStateNode> regions = waitOnAssignQueue();</span> |
| <span class="source-line-no">2421</span><span id="line-2421"> if (regions == null || regions.size() == 0 || !isRunning()) {</span> |
| <span class="source-line-no">2422</span><span id="line-2422"> return;</span> |
| <span class="source-line-no">2423</span><span id="line-2423"> }</span> |
| <span class="source-line-no">2424</span><span id="line-2424"></span> |
| <span class="source-line-no">2425</span><span id="line-2425"> if (LOG.isTraceEnabled()) {</span> |
| <span class="source-line-no">2426</span><span id="line-2426"> LOG.trace("PROCESS ASSIGN QUEUE regionCount=" + regions.size());</span> |
| <span class="source-line-no">2427</span><span id="line-2427"> }</span> |
| <span class="source-line-no">2428</span><span id="line-2428"></span> |
| <span class="source-line-no">2429</span><span id="line-2429"> // TODO: Optimize balancer. pass a RegionPlan?</span> |
| <span class="source-line-no">2430</span><span id="line-2430"> final HashMap<RegionInfo, ServerName> retainMap = new HashMap<>();</span> |
| <span class="source-line-no">2431</span><span id="line-2431"> final List<RegionInfo> userHRIs = new ArrayList<>(regions.size());</span> |
| <span class="source-line-no">2432</span><span id="line-2432"> // Regions for system tables requiring reassignment</span> |
| <span class="source-line-no">2433</span><span id="line-2433"> final List<RegionInfo> systemHRIs = new ArrayList<>();</span> |
| <span class="source-line-no">2434</span><span id="line-2434"> for (RegionStateNode regionStateNode : regions.values()) {</span> |
| <span class="source-line-no">2435</span><span id="line-2435"> boolean sysTable = regionStateNode.isSystemTable();</span> |
| <span class="source-line-no">2436</span><span id="line-2436"> final List<RegionInfo> hris = sysTable ? systemHRIs : userHRIs;</span> |
| <span class="source-line-no">2437</span><span id="line-2437"> if (regionStateNode.getRegionLocation() != null) {</span> |
| <span class="source-line-no">2438</span><span id="line-2438"> retainMap.put(regionStateNode.getRegionInfo(), regionStateNode.getRegionLocation());</span> |
| <span class="source-line-no">2439</span><span id="line-2439"> } else {</span> |
| <span class="source-line-no">2440</span><span id="line-2440"> hris.add(regionStateNode.getRegionInfo());</span> |
| <span class="source-line-no">2441</span><span id="line-2441"> }</span> |
| <span class="source-line-no">2442</span><span id="line-2442"> }</span> |
| <span class="source-line-no">2443</span><span id="line-2443"></span> |
| <span class="source-line-no">2444</span><span id="line-2444"> // TODO: connect with the listener to invalidate the cache</span> |
| <span class="source-line-no">2445</span><span id="line-2445"></span> |
| <span class="source-line-no">2446</span><span id="line-2446"> // TODO use events</span> |
| <span class="source-line-no">2447</span><span id="line-2447"> List<ServerName> servers = master.getServerManager().createDestinationServersList();</span> |
| <span class="source-line-no">2448</span><span id="line-2448"> for (int i = 0; servers.size() < 1; ++i) {</span> |
| <span class="source-line-no">2449</span><span id="line-2449"> // Report every fourth time around this loop; try not to flood log.</span> |
| <span class="source-line-no">2450</span><span id="line-2450"> if (i % 4 == 0) {</span> |
| <span class="source-line-no">2451</span><span id="line-2451"> LOG.warn("No servers available; cannot place " + regions.size() + " unassigned regions.");</span> |
| <span class="source-line-no">2452</span><span id="line-2452"> }</span> |
| <span class="source-line-no">2453</span><span id="line-2453"></span> |
| <span class="source-line-no">2454</span><span id="line-2454"> if (!isRunning()) {</span> |
| <span class="source-line-no">2455</span><span id="line-2455"> LOG.debug("Stopped! Dropping assign of " + regions.size() + " queued regions.");</span> |
| <span class="source-line-no">2456</span><span id="line-2456"> return;</span> |
| <span class="source-line-no">2457</span><span id="line-2457"> }</span> |
| <span class="source-line-no">2458</span><span id="line-2458"> Threads.sleep(250);</span> |
| <span class="source-line-no">2459</span><span id="line-2459"> servers = master.getServerManager().createDestinationServersList();</span> |
| <span class="source-line-no">2460</span><span id="line-2460"> }</span> |
| <span class="source-line-no">2461</span><span id="line-2461"></span> |
| <span class="source-line-no">2462</span><span id="line-2462"> if (!systemHRIs.isEmpty()) {</span> |
| <span class="source-line-no">2463</span><span id="line-2463"> // System table regions requiring reassignment are present, get region servers</span> |
| <span class="source-line-no">2464</span><span id="line-2464"> // not available for system table regions</span> |
| <span class="source-line-no">2465</span><span id="line-2465"> final List<ServerName> excludeServers = getExcludedServersForSystemTable();</span> |
| <span class="source-line-no">2466</span><span id="line-2466"> List<ServerName> serversForSysTables =</span> |
| <span class="source-line-no">2467</span><span id="line-2467"> servers.stream().filter(s -> !excludeServers.contains(s)).collect(Collectors.toList());</span> |
| <span class="source-line-no">2468</span><span id="line-2468"> if (serversForSysTables.isEmpty()) {</span> |
| <span class="source-line-no">2469</span><span id="line-2469"> LOG.warn("Filtering old server versions and the excluded produced an empty set; "</span> |
| <span class="source-line-no">2470</span><span id="line-2470"> + "instead considering all candidate servers!");</span> |
| <span class="source-line-no">2471</span><span id="line-2471"> }</span> |
| <span class="source-line-no">2472</span><span id="line-2472"> LOG.debug("Processing assignQueue; systemServersCount=" + serversForSysTables.size()</span> |
| <span class="source-line-no">2473</span><span id="line-2473"> + ", allServersCount=" + servers.size());</span> |
| <span class="source-line-no">2474</span><span id="line-2474"> processAssignmentPlans(regions, null, systemHRIs,</span> |
| <span class="source-line-no">2475</span><span id="line-2475"> serversForSysTables.isEmpty() && !containsBogusAssignments(regions, systemHRIs)</span> |
| <span class="source-line-no">2476</span><span id="line-2476"> ? servers</span> |
| <span class="source-line-no">2477</span><span id="line-2477"> : serversForSysTables);</span> |
| <span class="source-line-no">2478</span><span id="line-2478"> }</span> |
| <span class="source-line-no">2479</span><span id="line-2479"></span> |
| <span class="source-line-no">2480</span><span id="line-2480"> processAssignmentPlans(regions, retainMap, userHRIs, servers);</span> |
| <span class="source-line-no">2481</span><span id="line-2481"> }</span> |
| <span class="source-line-no">2482</span><span id="line-2482"></span> |
| <span class="source-line-no">2483</span><span id="line-2483"> private boolean containsBogusAssignments(Map<RegionInfo, RegionStateNode> regions,</span> |
| <span class="source-line-no">2484</span><span id="line-2484"> List<RegionInfo> hirs) {</span> |
| <span class="source-line-no">2485</span><span id="line-2485"> for (RegionInfo ri : hirs) {</span> |
| <span class="source-line-no">2486</span><span id="line-2486"> if (</span> |
| <span class="source-line-no">2487</span><span id="line-2487"> regions.get(ri).getRegionLocation() != null</span> |
| <span class="source-line-no">2488</span><span id="line-2488"> && regions.get(ri).getRegionLocation().equals(LoadBalancer.BOGUS_SERVER_NAME)</span> |
| <span class="source-line-no">2489</span><span id="line-2489"> ) {</span> |
| <span class="source-line-no">2490</span><span id="line-2490"> return true;</span> |
| <span class="source-line-no">2491</span><span id="line-2491"> }</span> |
| <span class="source-line-no">2492</span><span id="line-2492"> }</span> |
| <span class="source-line-no">2493</span><span id="line-2493"> return false;</span> |
| <span class="source-line-no">2494</span><span id="line-2494"> }</span> |
| <span class="source-line-no">2495</span><span id="line-2495"></span> |
| <span class="source-line-no">2496</span><span id="line-2496"> private void processAssignmentPlans(final HashMap<RegionInfo, RegionStateNode> regions,</span> |
| <span class="source-line-no">2497</span><span id="line-2497"> final HashMap<RegionInfo, ServerName> retainMap, final List<RegionInfo> hris,</span> |
| <span class="source-line-no">2498</span><span id="line-2498"> final List<ServerName> servers) {</span> |
| <span class="source-line-no">2499</span><span id="line-2499"> boolean isTraceEnabled = LOG.isTraceEnabled();</span> |
| <span class="source-line-no">2500</span><span id="line-2500"> if (isTraceEnabled) {</span> |
| <span class="source-line-no">2501</span><span id="line-2501"> LOG.trace("Available servers count=" + servers.size() + ": " + servers);</span> |
| <span class="source-line-no">2502</span><span id="line-2502"> }</span> |
| <span class="source-line-no">2503</span><span id="line-2503"></span> |
| <span class="source-line-no">2504</span><span id="line-2504"> final LoadBalancer balancer = getBalancer();</span> |
| <span class="source-line-no">2505</span><span id="line-2505"> // ask the balancer where to place regions</span> |
| <span class="source-line-no">2506</span><span id="line-2506"> if (retainMap != null && !retainMap.isEmpty()) {</span> |
| <span class="source-line-no">2507</span><span id="line-2507"> if (isTraceEnabled) {</span> |
| <span class="source-line-no">2508</span><span id="line-2508"> LOG.trace("retain assign regions=" + retainMap);</span> |
| <span class="source-line-no">2509</span><span id="line-2509"> }</span> |
| <span class="source-line-no">2510</span><span id="line-2510"> try {</span> |
| <span class="source-line-no">2511</span><span id="line-2511"> acceptPlan(regions, balancer.retainAssignment(retainMap, servers));</span> |
| <span class="source-line-no">2512</span><span id="line-2512"> } catch (IOException e) {</span> |
| <span class="source-line-no">2513</span><span id="line-2513"> LOG.warn("unable to retain assignment", e);</span> |
| <span class="source-line-no">2514</span><span id="line-2514"> addToPendingAssignment(regions, retainMap.keySet());</span> |
| <span class="source-line-no">2515</span><span id="line-2515"> }</span> |
| <span class="source-line-no">2516</span><span id="line-2516"> }</span> |
| <span class="source-line-no">2517</span><span id="line-2517"></span> |
| <span class="source-line-no">2518</span><span id="line-2518"> // TODO: Do we need to split retain and round-robin?</span> |
| <span class="source-line-no">2519</span><span id="line-2519"> // the retain seems to fallback to round-robin/random if the region is not in the map.</span> |
| <span class="source-line-no">2520</span><span id="line-2520"> if (!hris.isEmpty()) {</span> |
| <span class="source-line-no">2521</span><span id="line-2521"> Collections.sort(hris, RegionInfo.COMPARATOR);</span> |
| <span class="source-line-no">2522</span><span id="line-2522"> if (isTraceEnabled) {</span> |
| <span class="source-line-no">2523</span><span id="line-2523"> LOG.trace("round robin regions=" + hris);</span> |
| <span class="source-line-no">2524</span><span id="line-2524"> }</span> |
| <span class="source-line-no">2525</span><span id="line-2525"> try {</span> |
| <span class="source-line-no">2526</span><span id="line-2526"> acceptPlan(regions, balancer.roundRobinAssignment(hris, servers));</span> |
| <span class="source-line-no">2527</span><span id="line-2527"> } catch (IOException e) {</span> |
| <span class="source-line-no">2528</span><span id="line-2528"> LOG.warn("unable to round-robin assignment", e);</span> |
| <span class="source-line-no">2529</span><span id="line-2529"> addToPendingAssignment(regions, hris);</span> |
| <span class="source-line-no">2530</span><span id="line-2530"> }</span> |
| <span class="source-line-no">2531</span><span id="line-2531"> }</span> |
| <span class="source-line-no">2532</span><span id="line-2532"> }</span> |
| <span class="source-line-no">2533</span><span id="line-2533"></span> |
| <span class="source-line-no">2534</span><span id="line-2534"> private void acceptPlan(final HashMap<RegionInfo, RegionStateNode> regions,</span> |
| <span class="source-line-no">2535</span><span id="line-2535"> final Map<ServerName, List<RegionInfo>> plan) throws HBaseIOException {</span> |
| <span class="source-line-no">2536</span><span id="line-2536"> final ProcedureEvent<?>[] events = new ProcedureEvent[regions.size()];</span> |
| <span class="source-line-no">2537</span><span id="line-2537"> final long st = EnvironmentEdgeManager.currentTime();</span> |
| <span class="source-line-no">2538</span><span id="line-2538"></span> |
| <span class="source-line-no">2539</span><span id="line-2539"> if (plan.isEmpty()) {</span> |
| <span class="source-line-no">2540</span><span id="line-2540"> throw new HBaseIOException("unable to compute plans for regions=" + regions.size());</span> |
| <span class="source-line-no">2541</span><span id="line-2541"> }</span> |
| <span class="source-line-no">2542</span><span id="line-2542"></span> |
| <span class="source-line-no">2543</span><span id="line-2543"> int evcount = 0;</span> |
| <span class="source-line-no">2544</span><span id="line-2544"> for (Map.Entry<ServerName, List<RegionInfo>> entry : plan.entrySet()) {</span> |
| <span class="source-line-no">2545</span><span id="line-2545"> final ServerName server = entry.getKey();</span> |
| <span class="source-line-no">2546</span><span id="line-2546"> for (RegionInfo hri : entry.getValue()) {</span> |
| <span class="source-line-no">2547</span><span id="line-2547"> final RegionStateNode regionNode = regions.get(hri);</span> |
| <span class="source-line-no">2548</span><span id="line-2548"> regionNode.setRegionLocation(server);</span> |
| <span class="source-line-no">2549</span><span id="line-2549"> if (server.equals(LoadBalancer.BOGUS_SERVER_NAME) && regionNode.isSystemTable()) {</span> |
| <span class="source-line-no">2550</span><span id="line-2550"> assignQueueLock.lock();</span> |
| <span class="source-line-no">2551</span><span id="line-2551"> try {</span> |
| <span class="source-line-no">2552</span><span id="line-2552"> pendingAssignQueue.add(regionNode);</span> |
| <span class="source-line-no">2553</span><span id="line-2553"> } finally {</span> |
| <span class="source-line-no">2554</span><span id="line-2554"> assignQueueLock.unlock();</span> |
| <span class="source-line-no">2555</span><span id="line-2555"> }</span> |
| <span class="source-line-no">2556</span><span id="line-2556"> } else {</span> |
| <span class="source-line-no">2557</span><span id="line-2557"> events[evcount++] = regionNode.getProcedureEvent();</span> |
| <span class="source-line-no">2558</span><span id="line-2558"> }</span> |
| <span class="source-line-no">2559</span><span id="line-2559"> }</span> |
| <span class="source-line-no">2560</span><span id="line-2560"> }</span> |
| <span class="source-line-no">2561</span><span id="line-2561"> ProcedureEvent.wakeEvents(getProcedureScheduler(), events);</span> |
| <span class="source-line-no">2562</span><span id="line-2562"></span> |
| <span class="source-line-no">2563</span><span id="line-2563"> final long et = EnvironmentEdgeManager.currentTime();</span> |
| <span class="source-line-no">2564</span><span id="line-2564"> if (LOG.isTraceEnabled()) {</span> |
| <span class="source-line-no">2565</span><span id="line-2565"> LOG.trace("ASSIGN ACCEPT " + events.length + " -> " + StringUtils.humanTimeDiff(et - st));</span> |
| <span class="source-line-no">2566</span><span id="line-2566"> }</span> |
| <span class="source-line-no">2567</span><span id="line-2567"> }</span> |
| <span class="source-line-no">2568</span><span id="line-2568"></span> |
| <span class="source-line-no">2569</span><span id="line-2569"> private void addToPendingAssignment(final HashMap<RegionInfo, RegionStateNode> regions,</span> |
| <span class="source-line-no">2570</span><span id="line-2570"> final Collection<RegionInfo> pendingRegions) {</span> |
| <span class="source-line-no">2571</span><span id="line-2571"> assignQueueLock.lock();</span> |
| <span class="source-line-no">2572</span><span id="line-2572"> try {</span> |
| <span class="source-line-no">2573</span><span id="line-2573"> for (RegionInfo hri : pendingRegions) {</span> |
| <span class="source-line-no">2574</span><span id="line-2574"> pendingAssignQueue.add(regions.get(hri));</span> |
| <span class="source-line-no">2575</span><span id="line-2575"> }</span> |
| <span class="source-line-no">2576</span><span id="line-2576"> } finally {</span> |
| <span class="source-line-no">2577</span><span id="line-2577"> assignQueueLock.unlock();</span> |
| <span class="source-line-no">2578</span><span id="line-2578"> }</span> |
| <span class="source-line-no">2579</span><span id="line-2579"> }</span> |
| <span class="source-line-no">2580</span><span id="line-2580"></span> |
| <span class="source-line-no">2581</span><span id="line-2581"> /**</span> |
| <span class="source-line-no">2582</span><span id="line-2582"> * For a given cluster with mixed versions of servers, get a list of servers with lower versions,</span> |
| <span class="source-line-no">2583</span><span id="line-2583"> * where system table regions should not be assigned to. For system table, we must assign regions</span> |
| <span class="source-line-no">2584</span><span id="line-2584"> * to a server with highest version. However, we can disable this exclusion using config:</span> |
| <span class="source-line-no">2585</span><span id="line-2585"> * "hbase.min.version.move.system.tables" if checkForMinVersion is true. Detailed explanation</span> |
| <span class="source-line-no">2586</span><span id="line-2586"> * available with definition of minVersionToMoveSysTables.</span> |
| <span class="source-line-no">2587</span><span id="line-2587"> * @return List of Excluded servers for System table regions.</span> |
| <span class="source-line-no">2588</span><span id="line-2588"> */</span> |
| <span class="source-line-no">2589</span><span id="line-2589"> public List<ServerName> getExcludedServersForSystemTable() {</span> |
| <span class="source-line-no">2590</span><span id="line-2590"> // TODO: This should be a cached list kept by the ServerManager rather than calculated on each</span> |
| <span class="source-line-no">2591</span><span id="line-2591"> // move or system region assign. The RegionServerTracker keeps list of online Servers with</span> |
| <span class="source-line-no">2592</span><span id="line-2592"> // RegionServerInfo that includes Version.</span> |
| <span class="source-line-no">2593</span><span id="line-2593"> List<Pair<ServerName, String>> serverList =</span> |
| <span class="source-line-no">2594</span><span id="line-2594"> master.getServerManager().getOnlineServersList().stream()</span> |
| <span class="source-line-no">2595</span><span id="line-2595"> .map(s -> new Pair<>(s, master.getRegionServerVersion(s))).collect(Collectors.toList());</span> |
| <span class="source-line-no">2596</span><span id="line-2596"> if (serverList.isEmpty()) {</span> |
| <span class="source-line-no">2597</span><span id="line-2597"> return new ArrayList<>();</span> |
| <span class="source-line-no">2598</span><span id="line-2598"> }</span> |
| <span class="source-line-no">2599</span><span id="line-2599"> String highestVersion = Collections</span> |
| <span class="source-line-no">2600</span><span id="line-2600"> .max(serverList, (o1, o2) -> VersionInfo.compareVersion(o1.getSecond(), o2.getSecond()))</span> |
| <span class="source-line-no">2601</span><span id="line-2601"> .getSecond();</span> |
| <span class="source-line-no">2602</span><span id="line-2602"> if (!DEFAULT_MIN_VERSION_MOVE_SYS_TABLES_CONFIG.equals(minVersionToMoveSysTables)) {</span> |
| <span class="source-line-no">2603</span><span id="line-2603"> int comparedValue = VersionInfo.compareVersion(minVersionToMoveSysTables, highestVersion);</span> |
| <span class="source-line-no">2604</span><span id="line-2604"> if (comparedValue > 0) {</span> |
| <span class="source-line-no">2605</span><span id="line-2605"> return new ArrayList<>();</span> |
| <span class="source-line-no">2606</span><span id="line-2606"> }</span> |
| <span class="source-line-no">2607</span><span id="line-2607"> }</span> |
| <span class="source-line-no">2608</span><span id="line-2608"> return serverList.stream().filter(pair -> !pair.getSecond().equals(highestVersion))</span> |
| <span class="source-line-no">2609</span><span id="line-2609"> .map(Pair::getFirst).collect(Collectors.toList());</span> |
| <span class="source-line-no">2610</span><span id="line-2610"> }</span> |
| <span class="source-line-no">2611</span><span id="line-2611"></span> |
| <span class="source-line-no">2612</span><span id="line-2612"> MasterServices getMaster() {</span> |
| <span class="source-line-no">2613</span><span id="line-2613"> return master;</span> |
| <span class="source-line-no">2614</span><span id="line-2614"> }</span> |
| <span class="source-line-no">2615</span><span id="line-2615"></span> |
| <span class="source-line-no">2616</span><span id="line-2616"> /** Returns a snapshot of rsReports */</span> |
| <span class="source-line-no">2617</span><span id="line-2617"> public Map<ServerName, Set<byte[]>> getRSReports() {</span> |
| <span class="source-line-no">2618</span><span id="line-2618"> Map<ServerName, Set<byte[]>> rsReportsSnapshot = new HashMap<>();</span> |
| <span class="source-line-no">2619</span><span id="line-2619"> synchronized (rsReports) {</span> |
| <span class="source-line-no">2620</span><span id="line-2620"> rsReports.entrySet().forEach(e -> rsReportsSnapshot.put(e.getKey(), e.getValue()));</span> |
| <span class="source-line-no">2621</span><span id="line-2621"> }</span> |
| <span class="source-line-no">2622</span><span id="line-2622"> return rsReportsSnapshot;</span> |
| <span class="source-line-no">2623</span><span id="line-2623"> }</span> |
| <span class="source-line-no">2624</span><span id="line-2624"></span> |
| <span class="source-line-no">2625</span><span id="line-2625"> /**</span> |
| <span class="source-line-no">2626</span><span id="line-2626"> * Provide regions state count for given table. e.g howmany regions of give table are</span> |
| <span class="source-line-no">2627</span><span id="line-2627"> * opened/closed/rit etc</span> |
| <span class="source-line-no">2628</span><span id="line-2628"> * @param tableName TableName</span> |
| <span class="source-line-no">2629</span><span id="line-2629"> * @return region states count</span> |
| <span class="source-line-no">2630</span><span id="line-2630"> */</span> |
| <span class="source-line-no">2631</span><span id="line-2631"> public RegionStatesCount getRegionStatesCount(TableName tableName) {</span> |
| <span class="source-line-no">2632</span><span id="line-2632"> int openRegionsCount = 0;</span> |
| <span class="source-line-no">2633</span><span id="line-2633"> int closedRegionCount = 0;</span> |
| <span class="source-line-no">2634</span><span id="line-2634"> int ritCount = 0;</span> |
| <span class="source-line-no">2635</span><span id="line-2635"> int splitRegionCount = 0;</span> |
| <span class="source-line-no">2636</span><span id="line-2636"> int totalRegionCount = 0;</span> |
| <span class="source-line-no">2637</span><span id="line-2637"> if (!isTableDisabled(tableName)) {</span> |
| <span class="source-line-no">2638</span><span id="line-2638"> final List<RegionState> states = regionStates.getTableRegionStates(tableName);</span> |
| <span class="source-line-no">2639</span><span id="line-2639"> for (RegionState regionState : states) {</span> |
| <span class="source-line-no">2640</span><span id="line-2640"> if (regionState.isOpened()) {</span> |
| <span class="source-line-no">2641</span><span id="line-2641"> openRegionsCount++;</span> |
| <span class="source-line-no">2642</span><span id="line-2642"> } else if (regionState.isClosed()) {</span> |
| <span class="source-line-no">2643</span><span id="line-2643"> closedRegionCount++;</span> |
| <span class="source-line-no">2644</span><span id="line-2644"> } else if (regionState.isSplit()) {</span> |
| <span class="source-line-no">2645</span><span id="line-2645"> splitRegionCount++;</span> |
| <span class="source-line-no">2646</span><span id="line-2646"> }</span> |
| <span class="source-line-no">2647</span><span id="line-2647"> }</span> |
| <span class="source-line-no">2648</span><span id="line-2648"> totalRegionCount = states.size();</span> |
| <span class="source-line-no">2649</span><span id="line-2649"> ritCount = totalRegionCount - openRegionsCount - splitRegionCount;</span> |
| <span class="source-line-no">2650</span><span id="line-2650"> }</span> |
| <span class="source-line-no">2651</span><span id="line-2651"> return new RegionStatesCount.RegionStatesCountBuilder().setOpenRegions(openRegionsCount)</span> |
| <span class="source-line-no">2652</span><span id="line-2652"> .setClosedRegions(closedRegionCount).setSplitRegions(splitRegionCount)</span> |
| <span class="source-line-no">2653</span><span id="line-2653"> .setRegionsInTransition(ritCount).setTotalRegions(totalRegionCount).build();</span> |
| <span class="source-line-no">2654</span><span id="line-2654"> }</span> |
| <span class="source-line-no">2655</span><span id="line-2655"></span> |
| <span class="source-line-no">2656</span><span id="line-2656">}</span> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </pre> |
| </div> |
| </main> |
| </body> |
| </html> |