| <!DOCTYPE HTML> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (17) --> |
| <title>Source code</title> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <meta name="description" content="source: package: org.apache.hadoop.hbase.master, class: ServerManager"> |
| <meta name="generator" content="javadoc/SourceToHTMLConverter"> |
| <link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style"> |
| </head> |
| <body class="source-page"> |
| <main role="main"> |
| <div class="source-container"> |
| <pre><span class="source-line-no">001</span><span id="line-1">/*</span> |
| <span class="source-line-no">002</span><span id="line-2"> * Licensed to the Apache Software Foundation (ASF) under one</span> |
| <span class="source-line-no">003</span><span id="line-3"> * or more contributor license agreements. See the NOTICE file</span> |
| <span class="source-line-no">004</span><span id="line-4"> * distributed with this work for additional information</span> |
| <span class="source-line-no">005</span><span id="line-5"> * regarding copyright ownership. The ASF licenses this file</span> |
| <span class="source-line-no">006</span><span id="line-6"> * to you under the Apache License, Version 2.0 (the</span> |
| <span class="source-line-no">007</span><span id="line-7"> * "License"); you may not use this file except in compliance</span> |
| <span class="source-line-no">008</span><span id="line-8"> * with the License. You may obtain a copy of the License at</span> |
| <span class="source-line-no">009</span><span id="line-9"> *</span> |
| <span class="source-line-no">010</span><span id="line-10"> * http://www.apache.org/licenses/LICENSE-2.0</span> |
| <span class="source-line-no">011</span><span id="line-11"> *</span> |
| <span class="source-line-no">012</span><span id="line-12"> * Unless required by applicable law or agreed to in writing, software</span> |
| <span class="source-line-no">013</span><span id="line-13"> * distributed under the License is distributed on an "AS IS" BASIS,</span> |
| <span class="source-line-no">014</span><span id="line-14"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> |
| <span class="source-line-no">015</span><span id="line-15"> * See the License for the specific language governing permissions and</span> |
| <span class="source-line-no">016</span><span id="line-16"> * limitations under the License.</span> |
| <span class="source-line-no">017</span><span id="line-17"> */</span> |
| <span class="source-line-no">018</span><span id="line-18">package org.apache.hadoop.hbase.master;</span> |
| <span class="source-line-no">019</span><span id="line-19"></span> |
| <span class="source-line-no">020</span><span id="line-20">import static org.apache.hadoop.hbase.util.ConcurrentMapUtils.computeIfAbsent;</span> |
| <span class="source-line-no">021</span><span id="line-21"></span> |
| <span class="source-line-no">022</span><span id="line-22">import java.io.IOException;</span> |
| <span class="source-line-no">023</span><span id="line-23">import java.net.InetAddress;</span> |
| <span class="source-line-no">024</span><span id="line-24">import java.util.ArrayList;</span> |
| <span class="source-line-no">025</span><span id="line-25">import java.util.Collections;</span> |
| <span class="source-line-no">026</span><span id="line-26">import java.util.HashSet;</span> |
| <span class="source-line-no">027</span><span id="line-27">import java.util.Iterator;</span> |
| <span class="source-line-no">028</span><span id="line-28">import java.util.List;</span> |
| <span class="source-line-no">029</span><span id="line-29">import java.util.Map;</span> |
| <span class="source-line-no">030</span><span id="line-30">import java.util.Map.Entry;</span> |
| <span class="source-line-no">031</span><span id="line-31">import java.util.Objects;</span> |
| <span class="source-line-no">032</span><span id="line-32">import java.util.Set;</span> |
| <span class="source-line-no">033</span><span id="line-33">import java.util.concurrent.ConcurrentNavigableMap;</span> |
| <span class="source-line-no">034</span><span id="line-34">import java.util.concurrent.ConcurrentSkipListMap;</span> |
| <span class="source-line-no">035</span><span id="line-35">import java.util.concurrent.CopyOnWriteArrayList;</span> |
| <span class="source-line-no">036</span><span id="line-36">import java.util.concurrent.atomic.AtomicBoolean;</span> |
| <span class="source-line-no">037</span><span id="line-37">import java.util.function.Predicate;</span> |
| <span class="source-line-no">038</span><span id="line-38">import org.apache.hadoop.conf.Configuration;</span> |
| <span class="source-line-no">039</span><span id="line-39">import org.apache.hadoop.fs.FSDataInputStream;</span> |
| <span class="source-line-no">040</span><span id="line-40">import org.apache.hadoop.fs.FSDataOutputStream;</span> |
| <span class="source-line-no">041</span><span id="line-41">import org.apache.hadoop.fs.FileSystem;</span> |
| <span class="source-line-no">042</span><span id="line-42">import org.apache.hadoop.fs.Path;</span> |
| <span class="source-line-no">043</span><span id="line-43">import org.apache.hadoop.hbase.ClockOutOfSyncException;</span> |
| <span class="source-line-no">044</span><span id="line-44">import org.apache.hadoop.hbase.HConstants;</span> |
| <span class="source-line-no">045</span><span id="line-45">import org.apache.hadoop.hbase.NotServingRegionException;</span> |
| <span class="source-line-no">046</span><span id="line-46">import org.apache.hadoop.hbase.RegionMetrics;</span> |
| <span class="source-line-no">047</span><span id="line-47">import org.apache.hadoop.hbase.ScheduledChore;</span> |
| <span class="source-line-no">048</span><span id="line-48">import org.apache.hadoop.hbase.ServerMetrics;</span> |
| <span class="source-line-no">049</span><span id="line-49">import org.apache.hadoop.hbase.ServerMetricsBuilder;</span> |
| <span class="source-line-no">050</span><span id="line-50">import org.apache.hadoop.hbase.ServerName;</span> |
| <span class="source-line-no">051</span><span id="line-51">import org.apache.hadoop.hbase.YouAreDeadException;</span> |
| <span class="source-line-no">052</span><span id="line-52">import org.apache.hadoop.hbase.client.AsyncClusterConnection;</span> |
| <span class="source-line-no">053</span><span id="line-53">import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin;</span> |
| <span class="source-line-no">054</span><span id="line-54">import org.apache.hadoop.hbase.client.RegionInfo;</span> |
| <span class="source-line-no">055</span><span id="line-55">import org.apache.hadoop.hbase.conf.ConfigurationObserver;</span> |
| <span class="source-line-no">056</span><span id="line-56">import org.apache.hadoop.hbase.ipc.DecommissionedHostRejectedException;</span> |
| <span class="source-line-no">057</span><span id="line-57">import org.apache.hadoop.hbase.ipc.RemoteWithExtrasException;</span> |
| <span class="source-line-no">058</span><span id="line-58">import org.apache.hadoop.hbase.master.assignment.RegionStates;</span> |
| <span class="source-line-no">059</span><span id="line-59">import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;</span> |
| <span class="source-line-no">060</span><span id="line-60">import org.apache.hadoop.hbase.monitoring.MonitoredTask;</span> |
| <span class="source-line-no">061</span><span id="line-61">import org.apache.hadoop.hbase.procedure2.Procedure;</span> |
| <span class="source-line-no">062</span><span id="line-62">import org.apache.hadoop.hbase.util.Bytes;</span> |
| <span class="source-line-no">063</span><span id="line-63">import org.apache.hadoop.hbase.util.CommonFSUtils;</span> |
| <span class="source-line-no">064</span><span id="line-64">import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;</span> |
| <span class="source-line-no">065</span><span id="line-65">import org.apache.hadoop.hbase.util.FutureUtils;</span> |
| <span class="source-line-no">066</span><span id="line-66">import org.apache.hadoop.hbase.zookeeper.ZKUtil;</span> |
| <span class="source-line-no">067</span><span id="line-67">import org.apache.hadoop.hbase.zookeeper.ZKWatcher;</span> |
| <span class="source-line-no">068</span><span id="line-68">import org.apache.hadoop.hbase.zookeeper.ZNodePaths;</span> |
| <span class="source-line-no">069</span><span id="line-69">import org.apache.yetus.audience.InterfaceAudience;</span> |
| <span class="source-line-no">070</span><span id="line-70">import org.apache.zookeeper.KeeperException;</span> |
| <span class="source-line-no">071</span><span id="line-71">import org.slf4j.Logger;</span> |
| <span class="source-line-no">072</span><span id="line-72">import org.slf4j.LoggerFactory;</span> |
| <span class="source-line-no">073</span><span id="line-73"></span> |
| <span class="source-line-no">074</span><span id="line-74">import org.apache.hbase.thirdparty.com.google.protobuf.ByteString;</span> |
| <span class="source-line-no">075</span><span id="line-75">import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;</span> |
| <span class="source-line-no">076</span><span id="line-76"></span> |
| <span class="source-line-no">077</span><span id="line-77">import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;</span> |
| <span class="source-line-no">078</span><span id="line-78">import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;</span> |
| <span class="source-line-no">079</span><span id="line-79">import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.RegionStoreSequenceIds;</span> |
| <span class="source-line-no">080</span><span id="line-80">import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.StoreSequenceId;</span> |
| <span class="source-line-no">081</span><span id="line-81">import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.FlushedRegionSequenceId;</span> |
| <span class="source-line-no">082</span><span id="line-82">import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.FlushedSequenceId;</span> |
| <span class="source-line-no">083</span><span id="line-83">import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.FlushedStoreSequenceId;</span> |
| <span class="source-line-no">084</span><span id="line-84">import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest;</span> |
| <span class="source-line-no">085</span><span id="line-85"></span> |
| <span class="source-line-no">086</span><span id="line-86">/**</span> |
| <span class="source-line-no">087</span><span id="line-87"> * The ServerManager class manages info about region servers.</span> |
| <span class="source-line-no">088</span><span id="line-88"> * <p></span> |
| <span class="source-line-no">089</span><span id="line-89"> * Maintains lists of online and dead servers. Processes the startups, shutdowns, and deaths of</span> |
| <span class="source-line-no">090</span><span id="line-90"> * region servers.</span> |
| <span class="source-line-no">091</span><span id="line-91"> * <p></span> |
| <span class="source-line-no">092</span><span id="line-92"> * Servers are distinguished in two different ways. A given server has a location, specified by</span> |
| <span class="source-line-no">093</span><span id="line-93"> * hostname and port, and of which there can only be one online at any given time. A server instance</span> |
| <span class="source-line-no">094</span><span id="line-94"> * is specified by the location (hostname and port) as well as the startcode (timestamp from when</span> |
| <span class="source-line-no">095</span><span id="line-95"> * the server was started). This is used to differentiate a restarted instance of a given server</span> |
| <span class="source-line-no">096</span><span id="line-96"> * from the original instance.</span> |
| <span class="source-line-no">097</span><span id="line-97"> * <p></span> |
| <span class="source-line-no">098</span><span id="line-98"> * If a sever is known not to be running any more, it is called dead. The dead server needs to be</span> |
| <span class="source-line-no">099</span><span id="line-99"> * handled by a ServerShutdownHandler. If the handler is not enabled yet, the server can't be</span> |
| <span class="source-line-no">100</span><span id="line-100"> * handled right away so it is queued up. After the handler is enabled, the server will be submitted</span> |
| <span class="source-line-no">101</span><span id="line-101"> * to a handler to handle. However, the handler may be just partially enabled. If so, the server</span> |
| <span class="source-line-no">102</span><span id="line-102"> * cannot be fully processed, and be queued up for further processing. A server is fully processed</span> |
| <span class="source-line-no">103</span><span id="line-103"> * only after the handler is fully enabled and has completed the handling.</span> |
| <span class="source-line-no">104</span><span id="line-104"> */</span> |
| <span class="source-line-no">105</span><span id="line-105">@InterfaceAudience.Private</span> |
| <span class="source-line-no">106</span><span id="line-106">public class ServerManager implements ConfigurationObserver {</span> |
| <span class="source-line-no">107</span><span id="line-107"> public static final String WAIT_ON_REGIONSERVERS_MAXTOSTART =</span> |
| <span class="source-line-no">108</span><span id="line-108"> "hbase.master.wait.on.regionservers.maxtostart";</span> |
| <span class="source-line-no">109</span><span id="line-109"></span> |
| <span class="source-line-no">110</span><span id="line-110"> public static final String WAIT_ON_REGIONSERVERS_MINTOSTART =</span> |
| <span class="source-line-no">111</span><span id="line-111"> "hbase.master.wait.on.regionservers.mintostart";</span> |
| <span class="source-line-no">112</span><span id="line-112"></span> |
| <span class="source-line-no">113</span><span id="line-113"> public static final String WAIT_ON_REGIONSERVERS_TIMEOUT =</span> |
| <span class="source-line-no">114</span><span id="line-114"> "hbase.master.wait.on.regionservers.timeout";</span> |
| <span class="source-line-no">115</span><span id="line-115"></span> |
| <span class="source-line-no">116</span><span id="line-116"> public static final String WAIT_ON_REGIONSERVERS_INTERVAL =</span> |
| <span class="source-line-no">117</span><span id="line-117"> "hbase.master.wait.on.regionservers.interval";</span> |
| <span class="source-line-no">118</span><span id="line-118"></span> |
| <span class="source-line-no">119</span><span id="line-119"> /**</span> |
| <span class="source-line-no">120</span><span id="line-120"> * see HBASE-20727 if set to true, flushedSequenceIdByRegion and storeFlushedSequenceIdsByRegion</span> |
| <span class="source-line-no">121</span><span id="line-121"> * will be persisted to HDFS and loaded when master restart to speed up log split</span> |
| <span class="source-line-no">122</span><span id="line-122"> */</span> |
| <span class="source-line-no">123</span><span id="line-123"> public static final String PERSIST_FLUSHEDSEQUENCEID =</span> |
| <span class="source-line-no">124</span><span id="line-124"> "hbase.master.persist.flushedsequenceid.enabled";</span> |
| <span class="source-line-no">125</span><span id="line-125"></span> |
| <span class="source-line-no">126</span><span id="line-126"> public static final boolean PERSIST_FLUSHEDSEQUENCEID_DEFAULT = true;</span> |
| <span class="source-line-no">127</span><span id="line-127"></span> |
| <span class="source-line-no">128</span><span id="line-128"> public static final String FLUSHEDSEQUENCEID_FLUSHER_INTERVAL =</span> |
| <span class="source-line-no">129</span><span id="line-129"> "hbase.master.flushedsequenceid.flusher.interval";</span> |
| <span class="source-line-no">130</span><span id="line-130"></span> |
| <span class="source-line-no">131</span><span id="line-131"> public static final int FLUSHEDSEQUENCEID_FLUSHER_INTERVAL_DEFAULT = 3 * 60 * 60 * 1000; // 3</span> |
| <span class="source-line-no">132</span><span id="line-132"> // hours</span> |
| <span class="source-line-no">133</span><span id="line-133"></span> |
| <span class="source-line-no">134</span><span id="line-134"> public static final String MAX_CLOCK_SKEW_MS = "hbase.master.maxclockskew";</span> |
| <span class="source-line-no">135</span><span id="line-135"></span> |
| <span class="source-line-no">136</span><span id="line-136"> private static final Logger LOG = LoggerFactory.getLogger(ServerManager.class);</span> |
| <span class="source-line-no">137</span><span id="line-137"></span> |
| <span class="source-line-no">138</span><span id="line-138"> // Set if we are to shutdown the cluster.</span> |
| <span class="source-line-no">139</span><span id="line-139"> private AtomicBoolean clusterShutdown = new AtomicBoolean(false);</span> |
| <span class="source-line-no">140</span><span id="line-140"></span> |
| <span class="source-line-no">141</span><span id="line-141"> /**</span> |
| <span class="source-line-no">142</span><span id="line-142"> * The last flushed sequence id for a region.</span> |
| <span class="source-line-no">143</span><span id="line-143"> */</span> |
| <span class="source-line-no">144</span><span id="line-144"> private final ConcurrentNavigableMap<byte[], Long> flushedSequenceIdByRegion =</span> |
| <span class="source-line-no">145</span><span id="line-145"> new ConcurrentSkipListMap<>(Bytes.BYTES_COMPARATOR);</span> |
| <span class="source-line-no">146</span><span id="line-146"></span> |
| <span class="source-line-no">147</span><span id="line-147"> private boolean persistFlushedSequenceId = true;</span> |
| <span class="source-line-no">148</span><span id="line-148"> private volatile boolean isFlushSeqIdPersistInProgress = false;</span> |
| <span class="source-line-no">149</span><span id="line-149"> /** File on hdfs to store last flushed sequence id of regions */</span> |
| <span class="source-line-no">150</span><span id="line-150"> private static final String LAST_FLUSHED_SEQ_ID_FILE = ".lastflushedseqids";</span> |
| <span class="source-line-no">151</span><span id="line-151"> private FlushedSequenceIdFlusher flushedSeqIdFlusher;</span> |
| <span class="source-line-no">152</span><span id="line-152"></span> |
| <span class="source-line-no">153</span><span id="line-153"> /**</span> |
| <span class="source-line-no">154</span><span id="line-154"> * The last flushed sequence id for a store in a region.</span> |
| <span class="source-line-no">155</span><span id="line-155"> */</span> |
| <span class="source-line-no">156</span><span id="line-156"> private final ConcurrentNavigableMap<byte[],</span> |
| <span class="source-line-no">157</span><span id="line-157"> ConcurrentNavigableMap<byte[], Long>> storeFlushedSequenceIdsByRegion =</span> |
| <span class="source-line-no">158</span><span id="line-158"> new ConcurrentSkipListMap<>(Bytes.BYTES_COMPARATOR);</span> |
| <span class="source-line-no">159</span><span id="line-159"></span> |
| <span class="source-line-no">160</span><span id="line-160"> /** Map of registered servers to their current load */</span> |
| <span class="source-line-no">161</span><span id="line-161"> private final ConcurrentNavigableMap<ServerName, ServerMetrics> onlineServers =</span> |
| <span class="source-line-no">162</span><span id="line-162"> new ConcurrentSkipListMap<>();</span> |
| <span class="source-line-no">163</span><span id="line-163"></span> |
| <span class="source-line-no">164</span><span id="line-164"> /** List of region servers that should not get any more new regions. */</span> |
| <span class="source-line-no">165</span><span id="line-165"> private final ArrayList<ServerName> drainingServers = new ArrayList<>();</span> |
| <span class="source-line-no">166</span><span id="line-166"></span> |
| <span class="source-line-no">167</span><span id="line-167"> private final MasterServices master;</span> |
| <span class="source-line-no">168</span><span id="line-168"> private final RegionServerList storage;</span> |
| <span class="source-line-no">169</span><span id="line-169"></span> |
| <span class="source-line-no">170</span><span id="line-170"> private final DeadServer deadservers = new DeadServer();</span> |
| <span class="source-line-no">171</span><span id="line-171"></span> |
| <span class="source-line-no">172</span><span id="line-172"> private final long maxSkew;</span> |
| <span class="source-line-no">173</span><span id="line-173"> private final long warningSkew;</span> |
| <span class="source-line-no">174</span><span id="line-174"></span> |
| <span class="source-line-no">175</span><span id="line-175"> /** Listeners that are called on server events. */</span> |
| <span class="source-line-no">176</span><span id="line-176"> private List<ServerListener> listeners = new CopyOnWriteArrayList<>();</span> |
| <span class="source-line-no">177</span><span id="line-177"></span> |
| <span class="source-line-no">178</span><span id="line-178"> /** Configured value of HConstants.REJECT_DECOMMISSIONED_HOSTS_KEY */</span> |
| <span class="source-line-no">179</span><span id="line-179"> private volatile boolean rejectDecommissionedHostsConfig;</span> |
| <span class="source-line-no">180</span><span id="line-180"></span> |
| <span class="source-line-no">181</span><span id="line-181"> /**</span> |
| <span class="source-line-no">182</span><span id="line-182"> * Constructor.</span> |
| <span class="source-line-no">183</span><span id="line-183"> */</span> |
| <span class="source-line-no">184</span><span id="line-184"> public ServerManager(final MasterServices master, RegionServerList storage) {</span> |
| <span class="source-line-no">185</span><span id="line-185"> this.master = master;</span> |
| <span class="source-line-no">186</span><span id="line-186"> this.storage = storage;</span> |
| <span class="source-line-no">187</span><span id="line-187"> Configuration c = master.getConfiguration();</span> |
| <span class="source-line-no">188</span><span id="line-188"> maxSkew = c.getLong(MAX_CLOCK_SKEW_MS, 30000);</span> |
| <span class="source-line-no">189</span><span id="line-189"> warningSkew = c.getLong("hbase.master.warningclockskew", 10000);</span> |
| <span class="source-line-no">190</span><span id="line-190"> persistFlushedSequenceId =</span> |
| <span class="source-line-no">191</span><span id="line-191"> c.getBoolean(PERSIST_FLUSHEDSEQUENCEID, PERSIST_FLUSHEDSEQUENCEID_DEFAULT);</span> |
| <span class="source-line-no">192</span><span id="line-192"> rejectDecommissionedHostsConfig = getRejectDecommissionedHostsConfig(c);</span> |
| <span class="source-line-no">193</span><span id="line-193"> }</span> |
| <span class="source-line-no">194</span><span id="line-194"></span> |
| <span class="source-line-no">195</span><span id="line-195"> /**</span> |
| <span class="source-line-no">196</span><span id="line-196"> * Implementation of the ConfigurationObserver interface. We are interested in live-loading the</span> |
| <span class="source-line-no">197</span><span id="line-197"> * configuration value of HConstants.REJECT_DECOMMISSIONED_HOSTS_KEY</span> |
| <span class="source-line-no">198</span><span id="line-198"> * @param conf Server configuration instance</span> |
| <span class="source-line-no">199</span><span id="line-199"> */</span> |
| <span class="source-line-no">200</span><span id="line-200"> @Override</span> |
| <span class="source-line-no">201</span><span id="line-201"> public void onConfigurationChange(Configuration conf) {</span> |
| <span class="source-line-no">202</span><span id="line-202"> final boolean newValue = getRejectDecommissionedHostsConfig(conf);</span> |
| <span class="source-line-no">203</span><span id="line-203"> if (rejectDecommissionedHostsConfig == newValue) {</span> |
| <span class="source-line-no">204</span><span id="line-204"> // no-op</span> |
| <span class="source-line-no">205</span><span id="line-205"> return;</span> |
| <span class="source-line-no">206</span><span id="line-206"> }</span> |
| <span class="source-line-no">207</span><span id="line-207"></span> |
| <span class="source-line-no">208</span><span id="line-208"> LOG.info("Config Reload for RejectDecommissionedHosts. previous value: {}, new value: {}",</span> |
| <span class="source-line-no">209</span><span id="line-209"> rejectDecommissionedHostsConfig, newValue);</span> |
| <span class="source-line-no">210</span><span id="line-210"></span> |
| <span class="source-line-no">211</span><span id="line-211"> rejectDecommissionedHostsConfig = newValue;</span> |
| <span class="source-line-no">212</span><span id="line-212"> }</span> |
| <span class="source-line-no">213</span><span id="line-213"></span> |
| <span class="source-line-no">214</span><span id="line-214"> /**</span> |
| <span class="source-line-no">215</span><span id="line-215"> * Reads the value of HConstants.REJECT_DECOMMISSIONED_HOSTS_KEY from the config and returns it</span> |
| <span class="source-line-no">216</span><span id="line-216"> * @param conf Configuration instance of the Master</span> |
| <span class="source-line-no">217</span><span id="line-217"> */</span> |
| <span class="source-line-no">218</span><span id="line-218"> public boolean getRejectDecommissionedHostsConfig(Configuration conf) {</span> |
| <span class="source-line-no">219</span><span id="line-219"> return conf.getBoolean(HConstants.REJECT_DECOMMISSIONED_HOSTS_KEY,</span> |
| <span class="source-line-no">220</span><span id="line-220"> HConstants.REJECT_DECOMMISSIONED_HOSTS_DEFAULT);</span> |
| <span class="source-line-no">221</span><span id="line-221"> }</span> |
| <span class="source-line-no">222</span><span id="line-222"></span> |
| <span class="source-line-no">223</span><span id="line-223"> /**</span> |
| <span class="source-line-no">224</span><span id="line-224"> * Add the listener to the notification list.</span> |
| <span class="source-line-no">225</span><span id="line-225"> * @param listener The ServerListener to register</span> |
| <span class="source-line-no">226</span><span id="line-226"> */</span> |
| <span class="source-line-no">227</span><span id="line-227"> public void registerListener(final ServerListener listener) {</span> |
| <span class="source-line-no">228</span><span id="line-228"> this.listeners.add(listener);</span> |
| <span class="source-line-no">229</span><span id="line-229"> }</span> |
| <span class="source-line-no">230</span><span id="line-230"></span> |
| <span class="source-line-no">231</span><span id="line-231"> /**</span> |
| <span class="source-line-no">232</span><span id="line-232"> * Remove the listener from the notification list.</span> |
| <span class="source-line-no">233</span><span id="line-233"> * @param listener The ServerListener to unregister</span> |
| <span class="source-line-no">234</span><span id="line-234"> */</span> |
| <span class="source-line-no">235</span><span id="line-235"> public boolean unregisterListener(final ServerListener listener) {</span> |
| <span class="source-line-no">236</span><span id="line-236"> return this.listeners.remove(listener);</span> |
| <span class="source-line-no">237</span><span id="line-237"> }</span> |
| <span class="source-line-no">238</span><span id="line-238"></span> |
| <span class="source-line-no">239</span><span id="line-239"> /**</span> |
| <span class="source-line-no">240</span><span id="line-240"> * Removes all of the ServerListeners of this collection that satisfy the given predicate.</span> |
| <span class="source-line-no">241</span><span id="line-241"> * @param filter a predicate which returns true for ServerListener to be removed</span> |
| <span class="source-line-no">242</span><span id="line-242"> */</span> |
| <span class="source-line-no">243</span><span id="line-243"> public boolean unregisterListenerIf(final Predicate<ServerListener> filter) {</span> |
| <span class="source-line-no">244</span><span id="line-244"> return this.listeners.removeIf(filter);</span> |
| <span class="source-line-no">245</span><span id="line-245"> }</span> |
| <span class="source-line-no">246</span><span id="line-246"></span> |
| <span class="source-line-no">247</span><span id="line-247"> /**</span> |
| <span class="source-line-no">248</span><span id="line-248"> * Let the server manager know a new regionserver has come online</span> |
| <span class="source-line-no">249</span><span id="line-249"> * @param request the startup request</span> |
| <span class="source-line-no">250</span><span id="line-250"> * @param versionNumber the version number of the new regionserver</span> |
| <span class="source-line-no">251</span><span id="line-251"> * @param version the version of the new regionserver, could contain strings like "SNAPSHOT"</span> |
| <span class="source-line-no">252</span><span id="line-252"> * @param ia the InetAddress from which request is received</span> |
| <span class="source-line-no">253</span><span id="line-253"> * @return The ServerName we know this server as.</span> |
| <span class="source-line-no">254</span><span id="line-254"> */</span> |
| <span class="source-line-no">255</span><span id="line-255"> ServerName regionServerStartup(RegionServerStartupRequest request, int versionNumber,</span> |
| <span class="source-line-no">256</span><span id="line-256"> String version, InetAddress ia) throws IOException {</span> |
| <span class="source-line-no">257</span><span id="line-257"> // Test for case where we get a region startup message from a regionserver</span> |
| <span class="source-line-no">258</span><span id="line-258"> // that has been quickly restarted but whose znode expiration handler has</span> |
| <span class="source-line-no">259</span><span id="line-259"> // not yet run, or from a server whose fail we are currently processing.</span> |
| <span class="source-line-no">260</span><span id="line-260"> // Test its host+port combo is present in serverAddressToServerInfo. If it</span> |
| <span class="source-line-no">261</span><span id="line-261"> // is, reject the server and trigger its expiration. The next time it comes</span> |
| <span class="source-line-no">262</span><span id="line-262"> // in, it should have been removed from serverAddressToServerInfo and queued</span> |
| <span class="source-line-no">263</span><span id="line-263"> // for processing by ProcessServerShutdown.</span> |
| <span class="source-line-no">264</span><span id="line-264"></span> |
| <span class="source-line-no">265</span><span id="line-265"> // if use-ip is enabled, we will use ip to expose Master/RS service for client,</span> |
| <span class="source-line-no">266</span><span id="line-266"> // see HBASE-27304 for details.</span> |
| <span class="source-line-no">267</span><span id="line-267"> boolean useIp = master.getConfiguration().getBoolean(HConstants.HBASE_SERVER_USEIP_ENABLED_KEY,</span> |
| <span class="source-line-no">268</span><span id="line-268"> HConstants.HBASE_SERVER_USEIP_ENABLED_DEFAULT);</span> |
| <span class="source-line-no">269</span><span id="line-269"> String isaHostName = useIp ? ia.getHostAddress() : ia.getHostName();</span> |
| <span class="source-line-no">270</span><span id="line-270"> final String hostname =</span> |
| <span class="source-line-no">271</span><span id="line-271"> request.hasUseThisHostnameInstead() ? request.getUseThisHostnameInstead() : isaHostName;</span> |
| <span class="source-line-no">272</span><span id="line-272"> ServerName sn = ServerName.valueOf(hostname, request.getPort(), request.getServerStartCode());</span> |
| <span class="source-line-no">273</span><span id="line-273"></span> |
| <span class="source-line-no">274</span><span id="line-274"> // Check if the host should be rejected based on it's decommissioned status</span> |
| <span class="source-line-no">275</span><span id="line-275"> checkRejectableDecommissionedStatus(sn);</span> |
| <span class="source-line-no">276</span><span id="line-276"></span> |
| <span class="source-line-no">277</span><span id="line-277"> checkClockSkew(sn, request.getServerCurrentTime());</span> |
| <span class="source-line-no">278</span><span id="line-278"> checkIsDead(sn, "STARTUP");</span> |
| <span class="source-line-no">279</span><span id="line-279"> if (!checkAndRecordNewServer(sn, ServerMetricsBuilder.of(sn, versionNumber, version))) {</span> |
| <span class="source-line-no">280</span><span id="line-280"> LOG.warn("THIS SHOULD NOT HAPPEN, RegionServerStartup could not record the server: {}", sn);</span> |
| <span class="source-line-no">281</span><span id="line-281"> }</span> |
| <span class="source-line-no">282</span><span id="line-282"> storage.started(sn);</span> |
| <span class="source-line-no">283</span><span id="line-283"> return sn;</span> |
| <span class="source-line-no">284</span><span id="line-284"> }</span> |
| <span class="source-line-no">285</span><span id="line-285"></span> |
| <span class="source-line-no">286</span><span id="line-286"> /**</span> |
| <span class="source-line-no">287</span><span id="line-287"> * Updates last flushed sequence Ids for the regions on server sn</span> |
| <span class="source-line-no">288</span><span id="line-288"> */</span> |
| <span class="source-line-no">289</span><span id="line-289"> private void updateLastFlushedSequenceIds(ServerName sn, ServerMetrics hsl) {</span> |
| <span class="source-line-no">290</span><span id="line-290"> for (Entry<byte[], RegionMetrics> entry : hsl.getRegionMetrics().entrySet()) {</span> |
| <span class="source-line-no">291</span><span id="line-291"> byte[] encodedRegionName = Bytes.toBytes(RegionInfo.encodeRegionName(entry.getKey()));</span> |
| <span class="source-line-no">292</span><span id="line-292"> Long existingValue = flushedSequenceIdByRegion.get(encodedRegionName);</span> |
| <span class="source-line-no">293</span><span id="line-293"> long l = entry.getValue().getCompletedSequenceId();</span> |
| <span class="source-line-no">294</span><span id="line-294"> // Don't let smaller sequence ids override greater sequence ids.</span> |
| <span class="source-line-no">295</span><span id="line-295"> if (LOG.isTraceEnabled()) {</span> |
| <span class="source-line-no">296</span><span id="line-296"> LOG.trace(Bytes.toString(encodedRegionName) + ", existingValue=" + existingValue</span> |
| <span class="source-line-no">297</span><span id="line-297"> + ", completeSequenceId=" + l);</span> |
| <span class="source-line-no">298</span><span id="line-298"> }</span> |
| <span class="source-line-no">299</span><span id="line-299"> if (existingValue == null || (l != HConstants.NO_SEQNUM && l > existingValue)) {</span> |
| <span class="source-line-no">300</span><span id="line-300"> flushedSequenceIdByRegion.put(encodedRegionName, l);</span> |
| <span class="source-line-no">301</span><span id="line-301"> } else if (l != HConstants.NO_SEQNUM && l < existingValue) {</span> |
| <span class="source-line-no">302</span><span id="line-302"> LOG.warn("RegionServer " + sn + " indicates a last flushed sequence id (" + l</span> |
| <span class="source-line-no">303</span><span id="line-303"> + ") that is less than the previous last flushed sequence id (" + existingValue</span> |
| <span class="source-line-no">304</span><span id="line-304"> + ") for region " + Bytes.toString(entry.getKey()) + " Ignoring.");</span> |
| <span class="source-line-no">305</span><span id="line-305"> }</span> |
| <span class="source-line-no">306</span><span id="line-306"> ConcurrentNavigableMap<byte[], Long> storeFlushedSequenceId =</span> |
| <span class="source-line-no">307</span><span id="line-307"> computeIfAbsent(storeFlushedSequenceIdsByRegion, encodedRegionName,</span> |
| <span class="source-line-no">308</span><span id="line-308"> () -> new ConcurrentSkipListMap<>(Bytes.BYTES_COMPARATOR));</span> |
| <span class="source-line-no">309</span><span id="line-309"> for (Entry<byte[], Long> storeSeqId : entry.getValue().getStoreSequenceId().entrySet()) {</span> |
| <span class="source-line-no">310</span><span id="line-310"> byte[] family = storeSeqId.getKey();</span> |
| <span class="source-line-no">311</span><span id="line-311"> existingValue = storeFlushedSequenceId.get(family);</span> |
| <span class="source-line-no">312</span><span id="line-312"> l = storeSeqId.getValue();</span> |
| <span class="source-line-no">313</span><span id="line-313"> if (LOG.isTraceEnabled()) {</span> |
| <span class="source-line-no">314</span><span id="line-314"> LOG.trace(Bytes.toString(encodedRegionName) + ", family=" + Bytes.toString(family)</span> |
| <span class="source-line-no">315</span><span id="line-315"> + ", existingValue=" + existingValue + ", completeSequenceId=" + l);</span> |
| <span class="source-line-no">316</span><span id="line-316"> }</span> |
| <span class="source-line-no">317</span><span id="line-317"> // Don't let smaller sequence ids override greater sequence ids.</span> |
| <span class="source-line-no">318</span><span id="line-318"> if (existingValue == null || (l != HConstants.NO_SEQNUM && l > existingValue.longValue())) {</span> |
| <span class="source-line-no">319</span><span id="line-319"> storeFlushedSequenceId.put(family, l);</span> |
| <span class="source-line-no">320</span><span id="line-320"> }</span> |
| <span class="source-line-no">321</span><span id="line-321"> }</span> |
| <span class="source-line-no">322</span><span id="line-322"> }</span> |
| <span class="source-line-no">323</span><span id="line-323"> }</span> |
| <span class="source-line-no">324</span><span id="line-324"></span> |
| <span class="source-line-no">325</span><span id="line-325"> public void regionServerReport(ServerName sn, ServerMetrics sl) throws YouAreDeadException {</span> |
| <span class="source-line-no">326</span><span id="line-326"> checkIsDead(sn, "REPORT");</span> |
| <span class="source-line-no">327</span><span id="line-327"> if (null == this.onlineServers.replace(sn, sl)) {</span> |
| <span class="source-line-no">328</span><span id="line-328"> // Already have this host+port combo and its just different start code?</span> |
| <span class="source-line-no">329</span><span id="line-329"> // Just let the server in. Presume master joining a running cluster.</span> |
| <span class="source-line-no">330</span><span id="line-330"> // recordNewServer is what happens at the end of reportServerStartup.</span> |
| <span class="source-line-no">331</span><span id="line-331"> // The only thing we are skipping is passing back to the regionserver</span> |
| <span class="source-line-no">332</span><span id="line-332"> // the ServerName to use. Here we presume a master has already done</span> |
| <span class="source-line-no">333</span><span id="line-333"> // that so we'll press on with whatever it gave us for ServerName.</span> |
| <span class="source-line-no">334</span><span id="line-334"> if (!checkAndRecordNewServer(sn, sl)) {</span> |
| <span class="source-line-no">335</span><span id="line-335"> // Master already registered server with same (host + port) and higher startcode.</span> |
| <span class="source-line-no">336</span><span id="line-336"> // This can happen if regionserver report comes late from old server</span> |
| <span class="source-line-no">337</span><span id="line-337"> // (possible race condition), by that time master has already processed SCP for that</span> |
| <span class="source-line-no">338</span><span id="line-338"> // server and started accepting regionserver report from new server i.e. server with</span> |
| <span class="source-line-no">339</span><span id="line-339"> // same (host + port) and higher startcode.</span> |
| <span class="source-line-no">340</span><span id="line-340"> // The exception thrown here is not meant to tell the region server it is dead because if</span> |
| <span class="source-line-no">341</span><span id="line-341"> // there is a new server on the same host port, the old server should have already been</span> |
| <span class="source-line-no">342</span><span id="line-342"> // dead in ideal situation.</span> |
| <span class="source-line-no">343</span><span id="line-343"> // The exception thrown here is to skip the later steps of the whole regionServerReport</span> |
| <span class="source-line-no">344</span><span id="line-344"> // request processing. Usually, after recording it in ServerManager, we will call the</span> |
| <span class="source-line-no">345</span><span id="line-345"> // related methods in AssignmentManager to record region states. If the region server</span> |
| <span class="source-line-no">346</span><span id="line-346"> // is already dead, we should not do these steps anymore, so here we throw an exception</span> |
| <span class="source-line-no">347</span><span id="line-347"> // to let the upper layer know that they should not continue processing anymore.</span> |
| <span class="source-line-no">348</span><span id="line-348"> final String errorMsg = "RegionServerReport received from " + sn</span> |
| <span class="source-line-no">349</span><span id="line-349"> + ", but another server with the same name and higher startcode is already registered,"</span> |
| <span class="source-line-no">350</span><span id="line-350"> + " ignoring";</span> |
| <span class="source-line-no">351</span><span id="line-351"> LOG.warn(errorMsg);</span> |
| <span class="source-line-no">352</span><span id="line-352"> throw new YouAreDeadException(errorMsg);</span> |
| <span class="source-line-no">353</span><span id="line-353"> }</span> |
| <span class="source-line-no">354</span><span id="line-354"> }</span> |
| <span class="source-line-no">355</span><span id="line-355"> updateLastFlushedSequenceIds(sn, sl);</span> |
| <span class="source-line-no">356</span><span id="line-356"> }</span> |
| <span class="source-line-no">357</span><span id="line-357"></span> |
| <span class="source-line-no">358</span><span id="line-358"> /**</span> |
| <span class="source-line-no">359</span><span id="line-359"> * Checks if the Master is configured to reject decommissioned hosts or not. When it's configured</span> |
| <span class="source-line-no">360</span><span id="line-360"> * to do so, any RegionServer trying to join the cluster will have it's host checked against the</span> |
| <span class="source-line-no">361</span><span id="line-361"> * list of hosts of currently decommissioned servers and potentially get prevented from reporting</span> |
| <span class="source-line-no">362</span><span id="line-362"> * for duty; otherwise, we do nothing and we let them pass to the next check. See HBASE-28342 for</span> |
| <span class="source-line-no">363</span><span id="line-363"> * details.</span> |
| <span class="source-line-no">364</span><span id="line-364"> * @param sn The ServerName to check for</span> |
| <span class="source-line-no">365</span><span id="line-365"> * @throws DecommissionedHostRejectedException if the Master is configured to reject</span> |
| <span class="source-line-no">366</span><span id="line-366"> * decommissioned hosts and this host exists in the</span> |
| <span class="source-line-no">367</span><span id="line-367"> * list of the decommissioned servers</span> |
| <span class="source-line-no">368</span><span id="line-368"> */</span> |
| <span class="source-line-no">369</span><span id="line-369"> private void checkRejectableDecommissionedStatus(ServerName sn)</span> |
| <span class="source-line-no">370</span><span id="line-370"> throws DecommissionedHostRejectedException {</span> |
| <span class="source-line-no">371</span><span id="line-371"> LOG.info("Checking decommissioned status of RegionServer {}", sn.getServerName());</span> |
| <span class="source-line-no">372</span><span id="line-372"></span> |
| <span class="source-line-no">373</span><span id="line-373"> // If the Master is not configured to reject decommissioned hosts, return early.</span> |
| <span class="source-line-no">374</span><span id="line-374"> if (!rejectDecommissionedHostsConfig) {</span> |
| <span class="source-line-no">375</span><span id="line-375"> return;</span> |
| <span class="source-line-no">376</span><span id="line-376"> }</span> |
| <span class="source-line-no">377</span><span id="line-377"></span> |
| <span class="source-line-no">378</span><span id="line-378"> // Look for a match for the hostname in the list of decommissioned servers</span> |
| <span class="source-line-no">379</span><span id="line-379"> for (ServerName server : getDrainingServersList()) {</span> |
| <span class="source-line-no">380</span><span id="line-380"> if (Objects.equals(server.getHostname(), sn.getHostname())) {</span> |
| <span class="source-line-no">381</span><span id="line-381"> // Found a match and master is configured to reject decommissioned hosts, throw exception!</span> |
| <span class="source-line-no">382</span><span id="line-382"> LOG.warn(</span> |
| <span class="source-line-no">383</span><span id="line-383"> "Rejecting RegionServer {} from reporting for duty because Master is configured "</span> |
| <span class="source-line-no">384</span><span id="line-384"> + "to reject decommissioned hosts and this host was marked as such in the past.",</span> |
| <span class="source-line-no">385</span><span id="line-385"> sn.getServerName());</span> |
| <span class="source-line-no">386</span><span id="line-386"> throw new DecommissionedHostRejectedException(String.format(</span> |
| <span class="source-line-no">387</span><span id="line-387"> "Host %s exists in the list of decommissioned servers and Master is configured to "</span> |
| <span class="source-line-no">388</span><span id="line-388"> + "reject decommissioned hosts",</span> |
| <span class="source-line-no">389</span><span id="line-389"> sn.getHostname()));</span> |
| <span class="source-line-no">390</span><span id="line-390"> }</span> |
| <span class="source-line-no">391</span><span id="line-391"> }</span> |
| <span class="source-line-no">392</span><span id="line-392"> }</span> |
| <span class="source-line-no">393</span><span id="line-393"></span> |
| <span class="source-line-no">394</span><span id="line-394"> /**</span> |
| <span class="source-line-no">395</span><span id="line-395"> * Check is a server of same host and port already exists, if not, or the existed one got a</span> |
| <span class="source-line-no">396</span><span id="line-396"> * smaller start code, record it.</span> |
| <span class="source-line-no">397</span><span id="line-397"> * @param serverName the server to check and record</span> |
| <span class="source-line-no">398</span><span id="line-398"> * @param sl the server load on the server</span> |
| <span class="source-line-no">399</span><span id="line-399"> * @return true if the server is recorded, otherwise, false</span> |
| <span class="source-line-no">400</span><span id="line-400"> */</span> |
| <span class="source-line-no">401</span><span id="line-401"> boolean checkAndRecordNewServer(final ServerName serverName, final ServerMetrics sl) {</span> |
| <span class="source-line-no">402</span><span id="line-402"> ServerName existingServer = null;</span> |
| <span class="source-line-no">403</span><span id="line-403"> synchronized (this.onlineServers) {</span> |
| <span class="source-line-no">404</span><span id="line-404"> existingServer = findServerWithSameHostnamePortWithLock(serverName);</span> |
| <span class="source-line-no">405</span><span id="line-405"> if (existingServer != null && (existingServer.getStartcode() > serverName.getStartcode())) {</span> |
| <span class="source-line-no">406</span><span id="line-406"> LOG.info("Server serverName=" + serverName + " rejected; we already have "</span> |
| <span class="source-line-no">407</span><span id="line-407"> + existingServer.toString() + " registered with same hostname and port");</span> |
| <span class="source-line-no">408</span><span id="line-408"> return false;</span> |
| <span class="source-line-no">409</span><span id="line-409"> }</span> |
| <span class="source-line-no">410</span><span id="line-410"> recordNewServerWithLock(serverName, sl);</span> |
| <span class="source-line-no">411</span><span id="line-411"> }</span> |
| <span class="source-line-no">412</span><span id="line-412"></span> |
| <span class="source-line-no">413</span><span id="line-413"> // Tell our listeners that a server was added</span> |
| <span class="source-line-no">414</span><span id="line-414"> if (!this.listeners.isEmpty()) {</span> |
| <span class="source-line-no">415</span><span id="line-415"> for (ServerListener listener : this.listeners) {</span> |
| <span class="source-line-no">416</span><span id="line-416"> listener.serverAdded(serverName);</span> |
| <span class="source-line-no">417</span><span id="line-417"> }</span> |
| <span class="source-line-no">418</span><span id="line-418"> }</span> |
| <span class="source-line-no">419</span><span id="line-419"></span> |
| <span class="source-line-no">420</span><span id="line-420"> // Note that we assume that same ts means same server, and don't expire in that case.</span> |
| <span class="source-line-no">421</span><span id="line-421"> // TODO: ts can theoretically collide due to clock shifts, so this is a bit hacky.</span> |
| <span class="source-line-no">422</span><span id="line-422"> if (existingServer != null && (existingServer.getStartcode() < serverName.getStartcode())) {</span> |
| <span class="source-line-no">423</span><span id="line-423"> LOG.info("Triggering server recovery; existingServer " + existingServer</span> |
| <span class="source-line-no">424</span><span id="line-424"> + " looks stale, new server:" + serverName);</span> |
| <span class="source-line-no">425</span><span id="line-425"> expireServer(existingServer);</span> |
| <span class="source-line-no">426</span><span id="line-426"> }</span> |
| <span class="source-line-no">427</span><span id="line-427"> return true;</span> |
| <span class="source-line-no">428</span><span id="line-428"> }</span> |
| <span class="source-line-no">429</span><span id="line-429"></span> |
| <span class="source-line-no">430</span><span id="line-430"> /**</span> |
| <span class="source-line-no">431</span><span id="line-431"> * Find out the region servers crashed between the crash of the previous master instance and the</span> |
| <span class="source-line-no">432</span><span id="line-432"> * current master instance and schedule SCP for them.</span> |
| <span class="source-line-no">433</span><span id="line-433"> * <p/></span> |
| <span class="source-line-no">434</span><span id="line-434"> * Since the {@code RegionServerTracker} has already helped us to construct the online servers set</span> |
| <span class="source-line-no">435</span><span id="line-435"> * by scanning zookeeper, now we can compare the online servers with {@code liveServersFromWALDir}</span> |
| <span class="source-line-no">436</span><span id="line-436"> * to find out whether there are servers which are already dead.</span> |
| <span class="source-line-no">437</span><span id="line-437"> * <p/></span> |
| <span class="source-line-no">438</span><span id="line-438"> * Must be called inside the initialization method of {@code RegionServerTracker} to avoid</span> |
| <span class="source-line-no">439</span><span id="line-439"> * concurrency issue.</span> |
| <span class="source-line-no">440</span><span id="line-440"> * @param deadServersFromPE the region servers which already have a SCP associated.</span> |
| <span class="source-line-no">441</span><span id="line-441"> * @param liveServersFromWALDir the live region servers from wal directory.</span> |
| <span class="source-line-no">442</span><span id="line-442"> */</span> |
| <span class="source-line-no">443</span><span id="line-443"> void findDeadServersAndProcess(Set<ServerName> deadServersFromPE,</span> |
| <span class="source-line-no">444</span><span id="line-444"> Set<ServerName> liveServersFromWALDir) {</span> |
| <span class="source-line-no">445</span><span id="line-445"> deadServersFromPE.forEach(deadservers::putIfAbsent);</span> |
| <span class="source-line-no">446</span><span id="line-446"> liveServersFromWALDir.stream().filter(sn -> !onlineServers.containsKey(sn))</span> |
| <span class="source-line-no">447</span><span id="line-447"> .forEach(this::expireServer);</span> |
| <span class="source-line-no">448</span><span id="line-448"> }</span> |
| <span class="source-line-no">449</span><span id="line-449"></span> |
| <span class="source-line-no">450</span><span id="line-450"> /**</span> |
| <span class="source-line-no">451</span><span id="line-451"> * Checks if the clock skew between the server and the master. If the clock skew exceeds the</span> |
| <span class="source-line-no">452</span><span id="line-452"> * configured max, it will throw an exception; if it exceeds the configured warning threshold, it</span> |
| <span class="source-line-no">453</span><span id="line-453"> * will log a warning but start normally.</span> |
| <span class="source-line-no">454</span><span id="line-454"> * @param serverName Incoming servers's name</span> |
| <span class="source-line-no">455</span><span id="line-455"> * @throws ClockOutOfSyncException if the skew exceeds the configured max value</span> |
| <span class="source-line-no">456</span><span id="line-456"> */</span> |
| <span class="source-line-no">457</span><span id="line-457"> private void checkClockSkew(final ServerName serverName, final long serverCurrentTime)</span> |
| <span class="source-line-no">458</span><span id="line-458"> throws ClockOutOfSyncException {</span> |
| <span class="source-line-no">459</span><span id="line-459"> long skew = Math.abs(EnvironmentEdgeManager.currentTime() - serverCurrentTime);</span> |
| <span class="source-line-no">460</span><span id="line-460"> if (skew > maxSkew) {</span> |
| <span class="source-line-no">461</span><span id="line-461"> String message = "Server " + serverName + " has been "</span> |
| <span class="source-line-no">462</span><span id="line-462"> + "rejected; Reported time is too far out of sync with master. " + "Time difference of "</span> |
| <span class="source-line-no">463</span><span id="line-463"> + skew + "ms > max allowed of " + maxSkew + "ms";</span> |
| <span class="source-line-no">464</span><span id="line-464"> LOG.warn(message);</span> |
| <span class="source-line-no">465</span><span id="line-465"> throw new ClockOutOfSyncException(message);</span> |
| <span class="source-line-no">466</span><span id="line-466"> } else if (skew > warningSkew) {</span> |
| <span class="source-line-no">467</span><span id="line-467"> String message = "Reported time for server " + serverName + " is out of sync with master "</span> |
| <span class="source-line-no">468</span><span id="line-468"> + "by " + skew + "ms. (Warning threshold is " + warningSkew + "ms; " + "error threshold is "</span> |
| <span class="source-line-no">469</span><span id="line-469"> + maxSkew + "ms)";</span> |
| <span class="source-line-no">470</span><span id="line-470"> LOG.warn(message);</span> |
| <span class="source-line-no">471</span><span id="line-471"> }</span> |
| <span class="source-line-no">472</span><span id="line-472"> }</span> |
| <span class="source-line-no">473</span><span id="line-473"></span> |
| <span class="source-line-no">474</span><span id="line-474"> /**</span> |
| <span class="source-line-no">475</span><span id="line-475"> * Called when RegionServer first reports in for duty and thereafter each time it heartbeats to</span> |
| <span class="source-line-no">476</span><span id="line-476"> * make sure it is has not been figured for dead. If this server is on the dead list, reject it</span> |
| <span class="source-line-no">477</span><span id="line-477"> * with a YouAreDeadException. If it was dead but came back with a new start code, remove the old</span> |
| <span class="source-line-no">478</span><span id="line-478"> * entry from the dead list.</span> |
| <span class="source-line-no">479</span><span id="line-479"> * @param what START or REPORT</span> |
| <span class="source-line-no">480</span><span id="line-480"> */</span> |
| <span class="source-line-no">481</span><span id="line-481"> private void checkIsDead(final ServerName serverName, final String what)</span> |
| <span class="source-line-no">482</span><span id="line-482"> throws YouAreDeadException {</span> |
| <span class="source-line-no">483</span><span id="line-483"> if (this.deadservers.isDeadServer(serverName)) {</span> |
| <span class="source-line-no">484</span><span id="line-484"> // Exact match: host name, port and start code all match with existing one of the</span> |
| <span class="source-line-no">485</span><span id="line-485"> // dead servers. So, this server must be dead. Tell it to kill itself.</span> |
| <span class="source-line-no">486</span><span id="line-486"> String message =</span> |
| <span class="source-line-no">487</span><span id="line-487"> "Server " + what + " rejected; currently processing " + serverName + " as dead server";</span> |
| <span class="source-line-no">488</span><span id="line-488"> LOG.debug(message);</span> |
| <span class="source-line-no">489</span><span id="line-489"> throw new YouAreDeadException(message);</span> |
| <span class="source-line-no">490</span><span id="line-490"> }</span> |
| <span class="source-line-no">491</span><span id="line-491"> // Remove dead server with same hostname and port of newly checking in rs after master</span> |
| <span class="source-line-no">492</span><span id="line-492"> // initialization. See HBASE-5916 for more information.</span> |
| <span class="source-line-no">493</span><span id="line-493"> if (</span> |
| <span class="source-line-no">494</span><span id="line-494"> (this.master == null || this.master.isInitialized())</span> |
| <span class="source-line-no">495</span><span id="line-495"> && this.deadservers.cleanPreviousInstance(serverName)</span> |
| <span class="source-line-no">496</span><span id="line-496"> ) {</span> |
| <span class="source-line-no">497</span><span id="line-497"> // This server has now become alive after we marked it as dead.</span> |
| <span class="source-line-no">498</span><span id="line-498"> // We removed it's previous entry from the dead list to reflect it.</span> |
| <span class="source-line-no">499</span><span id="line-499"> LOG.debug("{} {} came back up, removed it from the dead servers list", what, serverName);</span> |
| <span class="source-line-no">500</span><span id="line-500"> }</span> |
| <span class="source-line-no">501</span><span id="line-501"> }</span> |
| <span class="source-line-no">502</span><span id="line-502"></span> |
| <span class="source-line-no">503</span><span id="line-503"> /**</span> |
| <span class="source-line-no">504</span><span id="line-504"> * Assumes onlineServers is locked.</span> |
| <span class="source-line-no">505</span><span id="line-505"> * @return ServerName with matching hostname and port.</span> |
| <span class="source-line-no">506</span><span id="line-506"> */</span> |
| <span class="source-line-no">507</span><span id="line-507"> public ServerName findServerWithSameHostnamePortWithLock(final ServerName serverName) {</span> |
| <span class="source-line-no">508</span><span id="line-508"> ServerName end =</span> |
| <span class="source-line-no">509</span><span id="line-509"> ServerName.valueOf(serverName.getHostname(), serverName.getPort(), Long.MAX_VALUE);</span> |
| <span class="source-line-no">510</span><span id="line-510"></span> |
| <span class="source-line-no">511</span><span id="line-511"> ServerName r = onlineServers.lowerKey(end);</span> |
| <span class="source-line-no">512</span><span id="line-512"> if (r != null) {</span> |
| <span class="source-line-no">513</span><span id="line-513"> if (ServerName.isSameAddress(r, serverName)) {</span> |
| <span class="source-line-no">514</span><span id="line-514"> return r;</span> |
| <span class="source-line-no">515</span><span id="line-515"> }</span> |
| <span class="source-line-no">516</span><span id="line-516"> }</span> |
| <span class="source-line-no">517</span><span id="line-517"> return null;</span> |
| <span class="source-line-no">518</span><span id="line-518"> }</span> |
| <span class="source-line-no">519</span><span id="line-519"></span> |
| <span class="source-line-no">520</span><span id="line-520"> /**</span> |
| <span class="source-line-no">521</span><span id="line-521"> * Adds the onlineServers list. onlineServers should be locked.</span> |
| <span class="source-line-no">522</span><span id="line-522"> * @param serverName The remote servers name.</span> |
| <span class="source-line-no">523</span><span id="line-523"> */</span> |
| <span class="source-line-no">524</span><span id="line-524"> void recordNewServerWithLock(final ServerName serverName, final ServerMetrics sl) {</span> |
| <span class="source-line-no">525</span><span id="line-525"> LOG.info("Registering regionserver=" + serverName);</span> |
| <span class="source-line-no">526</span><span id="line-526"> this.onlineServers.put(serverName, sl);</span> |
| <span class="source-line-no">527</span><span id="line-527"> master.getAssignmentManager().getRegionStates().createServer(serverName);</span> |
| <span class="source-line-no">528</span><span id="line-528"> }</span> |
| <span class="source-line-no">529</span><span id="line-529"></span> |
| <span class="source-line-no">530</span><span id="line-530"> public ConcurrentNavigableMap<byte[], Long> getFlushedSequenceIdByRegion() {</span> |
| <span class="source-line-no">531</span><span id="line-531"> return flushedSequenceIdByRegion;</span> |
| <span class="source-line-no">532</span><span id="line-532"> }</span> |
| <span class="source-line-no">533</span><span id="line-533"></span> |
| <span class="source-line-no">534</span><span id="line-534"> public RegionStoreSequenceIds getLastFlushedSequenceId(byte[] encodedRegionName) {</span> |
| <span class="source-line-no">535</span><span id="line-535"> RegionStoreSequenceIds.Builder builder = RegionStoreSequenceIds.newBuilder();</span> |
| <span class="source-line-no">536</span><span id="line-536"> Long seqId = flushedSequenceIdByRegion.get(encodedRegionName);</span> |
| <span class="source-line-no">537</span><span id="line-537"> builder.setLastFlushedSequenceId(seqId != null ? seqId.longValue() : HConstants.NO_SEQNUM);</span> |
| <span class="source-line-no">538</span><span id="line-538"> Map<byte[], Long> storeFlushedSequenceId =</span> |
| <span class="source-line-no">539</span><span id="line-539"> storeFlushedSequenceIdsByRegion.get(encodedRegionName);</span> |
| <span class="source-line-no">540</span><span id="line-540"> if (storeFlushedSequenceId != null) {</span> |
| <span class="source-line-no">541</span><span id="line-541"> for (Map.Entry<byte[], Long> entry : storeFlushedSequenceId.entrySet()) {</span> |
| <span class="source-line-no">542</span><span id="line-542"> builder.addStoreSequenceId(StoreSequenceId.newBuilder()</span> |
| <span class="source-line-no">543</span><span id="line-543"> .setFamilyName(UnsafeByteOperations.unsafeWrap(entry.getKey()))</span> |
| <span class="source-line-no">544</span><span id="line-544"> .setSequenceId(entry.getValue().longValue()).build());</span> |
| <span class="source-line-no">545</span><span id="line-545"> }</span> |
| <span class="source-line-no">546</span><span id="line-546"> }</span> |
| <span class="source-line-no">547</span><span id="line-547"> return builder.build();</span> |
| <span class="source-line-no">548</span><span id="line-548"> }</span> |
| <span class="source-line-no">549</span><span id="line-549"></span> |
| <span class="source-line-no">550</span><span id="line-550"> /** Returns ServerMetrics if serverName is known else null */</span> |
| <span class="source-line-no">551</span><span id="line-551"> public ServerMetrics getLoad(final ServerName serverName) {</span> |
| <span class="source-line-no">552</span><span id="line-552"> return this.onlineServers.get(serverName);</span> |
| <span class="source-line-no">553</span><span id="line-553"> }</span> |
| <span class="source-line-no">554</span><span id="line-554"></span> |
| <span class="source-line-no">555</span><span id="line-555"> /**</span> |
| <span class="source-line-no">556</span><span id="line-556"> * Compute the average load across all region servers. Currently, this uses a very naive</span> |
| <span class="source-line-no">557</span><span id="line-557"> * computation - just uses the number of regions being served, ignoring stats about number of</span> |
| <span class="source-line-no">558</span><span id="line-558"> * requests.</span> |
| <span class="source-line-no">559</span><span id="line-559"> * @return the average load</span> |
| <span class="source-line-no">560</span><span id="line-560"> */</span> |
| <span class="source-line-no">561</span><span id="line-561"> public double getAverageLoad() {</span> |
| <span class="source-line-no">562</span><span id="line-562"> int totalLoad = 0;</span> |
| <span class="source-line-no">563</span><span id="line-563"> int numServers = 0;</span> |
| <span class="source-line-no">564</span><span id="line-564"> for (ServerMetrics sl : this.onlineServers.values()) {</span> |
| <span class="source-line-no">565</span><span id="line-565"> numServers++;</span> |
| <span class="source-line-no">566</span><span id="line-566"> totalLoad += sl.getRegionMetrics().size();</span> |
| <span class="source-line-no">567</span><span id="line-567"> }</span> |
| <span class="source-line-no">568</span><span id="line-568"> return numServers == 0 ? 0 : (double) totalLoad / (double) numServers;</span> |
| <span class="source-line-no">569</span><span id="line-569"> }</span> |
| <span class="source-line-no">570</span><span id="line-570"></span> |
| <span class="source-line-no">571</span><span id="line-571"> /** Returns the count of active regionservers */</span> |
| <span class="source-line-no">572</span><span id="line-572"> public int countOfRegionServers() {</span> |
| <span class="source-line-no">573</span><span id="line-573"> // Presumes onlineServers is a concurrent map</span> |
| <span class="source-line-no">574</span><span id="line-574"> return this.onlineServers.size();</span> |
| <span class="source-line-no">575</span><span id="line-575"> }</span> |
| <span class="source-line-no">576</span><span id="line-576"></span> |
| <span class="source-line-no">577</span><span id="line-577"> /** Returns Read-only map of servers to serverinfo */</span> |
| <span class="source-line-no">578</span><span id="line-578"> public Map<ServerName, ServerMetrics> getOnlineServers() {</span> |
| <span class="source-line-no">579</span><span id="line-579"> // Presumption is that iterating the returned Map is OK.</span> |
| <span class="source-line-no">580</span><span id="line-580"> synchronized (this.onlineServers) {</span> |
| <span class="source-line-no">581</span><span id="line-581"> return Collections.unmodifiableMap(this.onlineServers);</span> |
| <span class="source-line-no">582</span><span id="line-582"> }</span> |
| <span class="source-line-no">583</span><span id="line-583"> }</span> |
| <span class="source-line-no">584</span><span id="line-584"></span> |
| <span class="source-line-no">585</span><span id="line-585"> public DeadServer getDeadServers() {</span> |
| <span class="source-line-no">586</span><span id="line-586"> return this.deadservers;</span> |
| <span class="source-line-no">587</span><span id="line-587"> }</span> |
| <span class="source-line-no">588</span><span id="line-588"></span> |
| <span class="source-line-no">589</span><span id="line-589"> /**</span> |
| <span class="source-line-no">590</span><span id="line-590"> * Checks if any dead servers are currently in progress.</span> |
| <span class="source-line-no">591</span><span id="line-591"> * @return true if any RS are being processed as dead, false if not</span> |
| <span class="source-line-no">592</span><span id="line-592"> */</span> |
| <span class="source-line-no">593</span><span id="line-593"> public boolean areDeadServersInProgress() throws IOException {</span> |
| <span class="source-line-no">594</span><span id="line-594"> return master.getProcedures().stream()</span> |
| <span class="source-line-no">595</span><span id="line-595"> .anyMatch(p -> !p.isFinished() && p instanceof ServerCrashProcedure);</span> |
| <span class="source-line-no">596</span><span id="line-596"> }</span> |
| <span class="source-line-no">597</span><span id="line-597"></span> |
| <span class="source-line-no">598</span><span id="line-598"> void letRegionServersShutdown() {</span> |
| <span class="source-line-no">599</span><span id="line-599"> long previousLogTime = 0;</span> |
| <span class="source-line-no">600</span><span id="line-600"> ServerName sn = master.getServerName();</span> |
| <span class="source-line-no">601</span><span id="line-601"> ZKWatcher zkw = master.getZooKeeper();</span> |
| <span class="source-line-no">602</span><span id="line-602"> int onlineServersCt;</span> |
| <span class="source-line-no">603</span><span id="line-603"> while ((onlineServersCt = onlineServers.size()) > 0) {</span> |
| <span class="source-line-no">604</span><span id="line-604"> if (EnvironmentEdgeManager.currentTime() > (previousLogTime + 1000)) {</span> |
| <span class="source-line-no">605</span><span id="line-605"> Set<ServerName> remainingServers = onlineServers.keySet();</span> |
| <span class="source-line-no">606</span><span id="line-606"> synchronized (onlineServers) {</span> |
| <span class="source-line-no">607</span><span id="line-607"> if (remainingServers.size() == 1 && remainingServers.contains(sn)) {</span> |
| <span class="source-line-no">608</span><span id="line-608"> // Master will delete itself later.</span> |
| <span class="source-line-no">609</span><span id="line-609"> return;</span> |
| <span class="source-line-no">610</span><span id="line-610"> }</span> |
| <span class="source-line-no">611</span><span id="line-611"> }</span> |
| <span class="source-line-no">612</span><span id="line-612"> StringBuilder sb = new StringBuilder();</span> |
| <span class="source-line-no">613</span><span id="line-613"> // It's ok here to not sync on onlineServers - merely logging</span> |
| <span class="source-line-no">614</span><span id="line-614"> for (ServerName key : remainingServers) {</span> |
| <span class="source-line-no">615</span><span id="line-615"> if (sb.length() > 0) {</span> |
| <span class="source-line-no">616</span><span id="line-616"> sb.append(", ");</span> |
| <span class="source-line-no">617</span><span id="line-617"> }</span> |
| <span class="source-line-no">618</span><span id="line-618"> sb.append(key);</span> |
| <span class="source-line-no">619</span><span id="line-619"> }</span> |
| <span class="source-line-no">620</span><span id="line-620"> LOG.info("Waiting on regionserver(s) " + sb.toString());</span> |
| <span class="source-line-no">621</span><span id="line-621"> previousLogTime = EnvironmentEdgeManager.currentTime();</span> |
| <span class="source-line-no">622</span><span id="line-622"> }</span> |
| <span class="source-line-no">623</span><span id="line-623"></span> |
| <span class="source-line-no">624</span><span id="line-624"> try {</span> |
| <span class="source-line-no">625</span><span id="line-625"> List<String> servers = getRegionServersInZK(zkw);</span> |
| <span class="source-line-no">626</span><span id="line-626"> if (</span> |
| <span class="source-line-no">627</span><span id="line-627"> servers == null || servers.isEmpty()</span> |
| <span class="source-line-no">628</span><span id="line-628"> || (servers.size() == 1 && servers.contains(sn.toString()))</span> |
| <span class="source-line-no">629</span><span id="line-629"> ) {</span> |
| <span class="source-line-no">630</span><span id="line-630"> LOG.info("ZK shows there is only the master self online, exiting now");</span> |
| <span class="source-line-no">631</span><span id="line-631"> // Master could have lost some ZK events, no need to wait more.</span> |
| <span class="source-line-no">632</span><span id="line-632"> break;</span> |
| <span class="source-line-no">633</span><span id="line-633"> }</span> |
| <span class="source-line-no">634</span><span id="line-634"> } catch (KeeperException ke) {</span> |
| <span class="source-line-no">635</span><span id="line-635"> LOG.warn("Failed to list regionservers", ke);</span> |
| <span class="source-line-no">636</span><span id="line-636"> // ZK is malfunctioning, don't hang here</span> |
| <span class="source-line-no">637</span><span id="line-637"> break;</span> |
| <span class="source-line-no">638</span><span id="line-638"> }</span> |
| <span class="source-line-no">639</span><span id="line-639"> synchronized (onlineServers) {</span> |
| <span class="source-line-no">640</span><span id="line-640"> try {</span> |
| <span class="source-line-no">641</span><span id="line-641"> if (onlineServersCt == onlineServers.size()) onlineServers.wait(100);</span> |
| <span class="source-line-no">642</span><span id="line-642"> } catch (InterruptedException ignored) {</span> |
| <span class="source-line-no">643</span><span id="line-643"> // continue</span> |
| <span class="source-line-no">644</span><span id="line-644"> }</span> |
| <span class="source-line-no">645</span><span id="line-645"> }</span> |
| <span class="source-line-no">646</span><span id="line-646"> }</span> |
| <span class="source-line-no">647</span><span id="line-647"> }</span> |
| <span class="source-line-no">648</span><span id="line-648"></span> |
| <span class="source-line-no">649</span><span id="line-649"> private List<String> getRegionServersInZK(final ZKWatcher zkw) throws KeeperException {</span> |
| <span class="source-line-no">650</span><span id="line-650"> return ZKUtil.listChildrenNoWatch(zkw, zkw.getZNodePaths().rsZNode);</span> |
| <span class="source-line-no">651</span><span id="line-651"> }</span> |
| <span class="source-line-no">652</span><span id="line-652"></span> |
| <span class="source-line-no">653</span><span id="line-653"> /**</span> |
| <span class="source-line-no">654</span><span id="line-654"> * Expire the passed server. Add it to list of dead servers and queue a shutdown processing.</span> |
| <span class="source-line-no">655</span><span id="line-655"> * @return pid if we queued a ServerCrashProcedure else {@link Procedure#NO_PROC_ID} if we did not</span> |
| <span class="source-line-no">656</span><span id="line-656"> * (could happen for many reasons including the fact that its this server that is going</span> |
| <span class="source-line-no">657</span><span id="line-657"> * down or we already have queued an SCP for this server or SCP processing is currently</span> |
| <span class="source-line-no">658</span><span id="line-658"> * disabled because we are in startup phase).</span> |
| <span class="source-line-no">659</span><span id="line-659"> */</span> |
| <span class="source-line-no">660</span><span id="line-660"> // Redo test so we can make this protected.</span> |
| <span class="source-line-no">661</span><span id="line-661"> public synchronized long expireServer(final ServerName serverName) {</span> |
| <span class="source-line-no">662</span><span id="line-662"> return expireServer(serverName, false);</span> |
| <span class="source-line-no">663</span><span id="line-663"></span> |
| <span class="source-line-no">664</span><span id="line-664"> }</span> |
| <span class="source-line-no">665</span><span id="line-665"></span> |
| <span class="source-line-no">666</span><span id="line-666"> synchronized long expireServer(final ServerName serverName, boolean force) {</span> |
| <span class="source-line-no">667</span><span id="line-667"> // THIS server is going down... can't handle our own expiration.</span> |
| <span class="source-line-no">668</span><span id="line-668"> if (serverName.equals(master.getServerName())) {</span> |
| <span class="source-line-no">669</span><span id="line-669"> if (!(master.isAborted() || master.isStopped())) {</span> |
| <span class="source-line-no">670</span><span id="line-670"> master.stop("We lost our znode?");</span> |
| <span class="source-line-no">671</span><span id="line-671"> }</span> |
| <span class="source-line-no">672</span><span id="line-672"> return Procedure.NO_PROC_ID;</span> |
| <span class="source-line-no">673</span><span id="line-673"> }</span> |
| <span class="source-line-no">674</span><span id="line-674"> if (this.deadservers.isDeadServer(serverName)) {</span> |
| <span class="source-line-no">675</span><span id="line-675"> LOG.warn("Expiration called on {} but already in DeadServer", serverName);</span> |
| <span class="source-line-no">676</span><span id="line-676"> return Procedure.NO_PROC_ID;</span> |
| <span class="source-line-no">677</span><span id="line-677"> }</span> |
| <span class="source-line-no">678</span><span id="line-678"> moveFromOnlineToDeadServers(serverName);</span> |
| <span class="source-line-no">679</span><span id="line-679"></span> |
| <span class="source-line-no">680</span><span id="line-680"> // If server is in draining mode, remove corresponding znode</span> |
| <span class="source-line-no">681</span><span id="line-681"> // In some tests, the mocked HM may not have ZK Instance, hence null check</span> |
| <span class="source-line-no">682</span><span id="line-682"> if (master.getZooKeeper() != null) {</span> |
| <span class="source-line-no">683</span><span id="line-683"> String drainingZnode = ZNodePaths</span> |
| <span class="source-line-no">684</span><span id="line-684"> .joinZNode(master.getZooKeeper().getZNodePaths().drainingZNode, serverName.getServerName());</span> |
| <span class="source-line-no">685</span><span id="line-685"> try {</span> |
| <span class="source-line-no">686</span><span id="line-686"> ZKUtil.deleteNodeFailSilent(master.getZooKeeper(), drainingZnode);</span> |
| <span class="source-line-no">687</span><span id="line-687"> } catch (KeeperException e) {</span> |
| <span class="source-line-no">688</span><span id="line-688"> LOG.warn(</span> |
| <span class="source-line-no">689</span><span id="line-689"> "Error deleting the draining znode for stopping server " + serverName.getServerName(), e);</span> |
| <span class="source-line-no">690</span><span id="line-690"> }</span> |
| <span class="source-line-no">691</span><span id="line-691"> }</span> |
| <span class="source-line-no">692</span><span id="line-692"></span> |
| <span class="source-line-no">693</span><span id="line-693"> // If cluster is going down, yes, servers are going to be expiring; don't</span> |
| <span class="source-line-no">694</span><span id="line-694"> // process as a dead server</span> |
| <span class="source-line-no">695</span><span id="line-695"> if (isClusterShutdown()) {</span> |
| <span class="source-line-no">696</span><span id="line-696"> LOG.info("Cluster shutdown set; " + serverName + " expired; onlineServers="</span> |
| <span class="source-line-no">697</span><span id="line-697"> + this.onlineServers.size());</span> |
| <span class="source-line-no">698</span><span id="line-698"> if (this.onlineServers.isEmpty()) {</span> |
| <span class="source-line-no">699</span><span id="line-699"> master.stop("Cluster shutdown set; onlineServer=0");</span> |
| <span class="source-line-no">700</span><span id="line-700"> }</span> |
| <span class="source-line-no">701</span><span id="line-701"> return Procedure.NO_PROC_ID;</span> |
| <span class="source-line-no">702</span><span id="line-702"> }</span> |
| <span class="source-line-no">703</span><span id="line-703"> LOG.info("Processing expiration of " + serverName + " on " + this.master.getServerName());</span> |
| <span class="source-line-no">704</span><span id="line-704"> long pid = master.getAssignmentManager().submitServerCrash(serverName, true, force);</span> |
| <span class="source-line-no">705</span><span id="line-705"> if (pid == Procedure.NO_PROC_ID) {</span> |
| <span class="source-line-no">706</span><span id="line-706"> // skip later processing as we failed to submit SCP</span> |
| <span class="source-line-no">707</span><span id="line-707"> return Procedure.NO_PROC_ID;</span> |
| <span class="source-line-no">708</span><span id="line-708"> }</span> |
| <span class="source-line-no">709</span><span id="line-709"> storage.expired(serverName);</span> |
| <span class="source-line-no">710</span><span id="line-710"> // Tell our listeners that a server was removed</span> |
| <span class="source-line-no">711</span><span id="line-711"> if (!this.listeners.isEmpty()) {</span> |
| <span class="source-line-no">712</span><span id="line-712"> this.listeners.stream().forEach(l -> l.serverRemoved(serverName));</span> |
| <span class="source-line-no">713</span><span id="line-713"> }</span> |
| <span class="source-line-no">714</span><span id="line-714"> // trigger a persist of flushedSeqId</span> |
| <span class="source-line-no">715</span><span id="line-715"> if (flushedSeqIdFlusher != null) {</span> |
| <span class="source-line-no">716</span><span id="line-716"> flushedSeqIdFlusher.triggerNow();</span> |
| <span class="source-line-no">717</span><span id="line-717"> }</span> |
| <span class="source-line-no">718</span><span id="line-718"> return pid;</span> |
| <span class="source-line-no">719</span><span id="line-719"> }</span> |
| <span class="source-line-no">720</span><span id="line-720"></span> |
| <span class="source-line-no">721</span><span id="line-721"> /**</span> |
| <span class="source-line-no">722</span><span id="line-722"> * Called when server has expired.</span> |
| <span class="source-line-no">723</span><span id="line-723"> */</span> |
| <span class="source-line-no">724</span><span id="line-724"> // Locking in this class needs cleanup.</span> |
| <span class="source-line-no">725</span><span id="line-725"> public synchronized void moveFromOnlineToDeadServers(final ServerName sn) {</span> |
| <span class="source-line-no">726</span><span id="line-726"> synchronized (this.onlineServers) {</span> |
| <span class="source-line-no">727</span><span id="line-727"> boolean online = this.onlineServers.containsKey(sn);</span> |
| <span class="source-line-no">728</span><span id="line-728"> if (online) {</span> |
| <span class="source-line-no">729</span><span id="line-729"> // Remove the server from the known servers lists and update load info BUT</span> |
| <span class="source-line-no">730</span><span id="line-730"> // add to deadservers first; do this so it'll show in dead servers list if</span> |
| <span class="source-line-no">731</span><span id="line-731"> // not in online servers list.</span> |
| <span class="source-line-no">732</span><span id="line-732"> this.deadservers.putIfAbsent(sn);</span> |
| <span class="source-line-no">733</span><span id="line-733"> this.onlineServers.remove(sn);</span> |
| <span class="source-line-no">734</span><span id="line-734"> onlineServers.notifyAll();</span> |
| <span class="source-line-no">735</span><span id="line-735"> } else {</span> |
| <span class="source-line-no">736</span><span id="line-736"> // If not online, that is odd but may happen if 'Unknown Servers' -- where meta</span> |
| <span class="source-line-no">737</span><span id="line-737"> // has references to servers not online nor in dead servers list. If</span> |
| <span class="source-line-no">738</span><span id="line-738"> // 'Unknown Server', don't add to DeadServers else will be there for ever.</span> |
| <span class="source-line-no">739</span><span id="line-739"> LOG.trace("Expiration of {} but server not online", sn);</span> |
| <span class="source-line-no">740</span><span id="line-740"> }</span> |
| <span class="source-line-no">741</span><span id="line-741"> }</span> |
| <span class="source-line-no">742</span><span id="line-742"> }</span> |
| <span class="source-line-no">743</span><span id="line-743"></span> |
| <span class="source-line-no">744</span><span id="line-744"> /*</span> |
| <span class="source-line-no">745</span><span id="line-745"> * Remove the server from the drain list.</span> |
| <span class="source-line-no">746</span><span id="line-746"> */</span> |
| <span class="source-line-no">747</span><span id="line-747"> public synchronized boolean removeServerFromDrainList(final ServerName sn) {</span> |
| <span class="source-line-no">748</span><span id="line-748"> LOG.info("Removing server {} from the draining list.", sn);</span> |
| <span class="source-line-no">749</span><span id="line-749"></span> |
| <span class="source-line-no">750</span><span id="line-750"> // Remove the server from the draining servers lists.</span> |
| <span class="source-line-no">751</span><span id="line-751"> return this.drainingServers.remove(sn);</span> |
| <span class="source-line-no">752</span><span id="line-752"> }</span> |
| <span class="source-line-no">753</span><span id="line-753"></span> |
| <span class="source-line-no">754</span><span id="line-754"> /**</span> |
| <span class="source-line-no">755</span><span id="line-755"> * Add the server to the drain list.</span> |
| <span class="source-line-no">756</span><span id="line-756"> * @return True if the server is added or the server is already on the drain list.</span> |
| <span class="source-line-no">757</span><span id="line-757"> */</span> |
| <span class="source-line-no">758</span><span id="line-758"> public synchronized boolean addServerToDrainList(final ServerName sn) {</span> |
| <span class="source-line-no">759</span><span id="line-759"> // If master is not rejecting decommissioned hosts, warn if the server (sn) is not online.</span> |
| <span class="source-line-no">760</span><span id="line-760"> // However, we want to add servers even if they're not online if the master is configured</span> |
| <span class="source-line-no">761</span><span id="line-761"> // to reject decommissioned hosts</span> |
| <span class="source-line-no">762</span><span id="line-762"> if (!rejectDecommissionedHostsConfig && !this.isServerOnline(sn)) {</span> |
| <span class="source-line-no">763</span><span id="line-763"> LOG.warn("Server {} is not currently online. Ignoring request to add it to draining list.",</span> |
| <span class="source-line-no">764</span><span id="line-764"> sn);</span> |
| <span class="source-line-no">765</span><span id="line-765"> return false;</span> |
| <span class="source-line-no">766</span><span id="line-766"> }</span> |
| <span class="source-line-no">767</span><span id="line-767"></span> |
| <span class="source-line-no">768</span><span id="line-768"> // Add the server to the draining servers lists, if it's not already in it.</span> |
| <span class="source-line-no">769</span><span id="line-769"> if (this.drainingServers.contains(sn)) {</span> |
| <span class="source-line-no">770</span><span id="line-770"> LOG.warn(</span> |
| <span class="source-line-no">771</span><span id="line-771"> "Server {} is already in the draining server list. Ignoring request to add it again.", sn);</span> |
| <span class="source-line-no">772</span><span id="line-772"> return true;</span> |
| <span class="source-line-no">773</span><span id="line-773"> }</span> |
| <span class="source-line-no">774</span><span id="line-774"></span> |
| <span class="source-line-no">775</span><span id="line-775"> LOG.info("Server {} added to draining server list.", sn);</span> |
| <span class="source-line-no">776</span><span id="line-776"> return this.drainingServers.add(sn);</span> |
| <span class="source-line-no">777</span><span id="line-777"> }</span> |
| <span class="source-line-no">778</span><span id="line-778"></span> |
| <span class="source-line-no">779</span><span id="line-779"> /**</span> |
| <span class="source-line-no">780</span><span id="line-780"> * Contacts a region server and waits up to timeout ms to close the region. This bypasses the</span> |
| <span class="source-line-no">781</span><span id="line-781"> * active hmaster. Pass -1 as timeout if you do not want to wait on result.</span> |
| <span class="source-line-no">782</span><span id="line-782"> */</span> |
| <span class="source-line-no">783</span><span id="line-783"> public static void closeRegionSilentlyAndWait(AsyncClusterConnection connection,</span> |
| <span class="source-line-no">784</span><span id="line-784"> ServerName server, RegionInfo region, long timeout) throws IOException, InterruptedException {</span> |
| <span class="source-line-no">785</span><span id="line-785"> AsyncRegionServerAdmin admin = connection.getRegionServerAdmin(server);</span> |
| <span class="source-line-no">786</span><span id="line-786"> try {</span> |
| <span class="source-line-no">787</span><span id="line-787"> FutureUtils.get(</span> |
| <span class="source-line-no">788</span><span id="line-788"> admin.closeRegion(ProtobufUtil.buildCloseRegionRequest(server, region.getRegionName())));</span> |
| <span class="source-line-no">789</span><span id="line-789"> } catch (IOException e) {</span> |
| <span class="source-line-no">790</span><span id="line-790"> LOG.warn("Exception when closing region: " + region.getRegionNameAsString(), e);</span> |
| <span class="source-line-no">791</span><span id="line-791"> }</span> |
| <span class="source-line-no">792</span><span id="line-792"> if (timeout < 0) {</span> |
| <span class="source-line-no">793</span><span id="line-793"> return;</span> |
| <span class="source-line-no">794</span><span id="line-794"> }</span> |
| <span class="source-line-no">795</span><span id="line-795"> long expiration = timeout + EnvironmentEdgeManager.currentTime();</span> |
| <span class="source-line-no">796</span><span id="line-796"> while (EnvironmentEdgeManager.currentTime() < expiration) {</span> |
| <span class="source-line-no">797</span><span id="line-797"> try {</span> |
| <span class="source-line-no">798</span><span id="line-798"> RegionInfo rsRegion = ProtobufUtil.toRegionInfo(FutureUtils</span> |
| <span class="source-line-no">799</span><span id="line-799"> .get(</span> |
| <span class="source-line-no">800</span><span id="line-800"> admin.getRegionInfo(RequestConverter.buildGetRegionInfoRequest(region.getRegionName())))</span> |
| <span class="source-line-no">801</span><span id="line-801"> .getRegionInfo());</span> |
| <span class="source-line-no">802</span><span id="line-802"> if (rsRegion == null) {</span> |
| <span class="source-line-no">803</span><span id="line-803"> return;</span> |
| <span class="source-line-no">804</span><span id="line-804"> }</span> |
| <span class="source-line-no">805</span><span id="line-805"> } catch (IOException ioe) {</span> |
| <span class="source-line-no">806</span><span id="line-806"> if (</span> |
| <span class="source-line-no">807</span><span id="line-807"> ioe instanceof NotServingRegionException</span> |
| <span class="source-line-no">808</span><span id="line-808"> || (ioe instanceof RemoteWithExtrasException && ((RemoteWithExtrasException) ioe)</span> |
| <span class="source-line-no">809</span><span id="line-809"> .unwrapRemoteException() instanceof NotServingRegionException)</span> |
| <span class="source-line-no">810</span><span id="line-810"> ) {</span> |
| <span class="source-line-no">811</span><span id="line-811"> // no need to retry again</span> |
| <span class="source-line-no">812</span><span id="line-812"> return;</span> |
| <span class="source-line-no">813</span><span id="line-813"> }</span> |
| <span class="source-line-no">814</span><span id="line-814"> LOG.warn("Exception when retrieving regioninfo from: " + region.getRegionNameAsString(),</span> |
| <span class="source-line-no">815</span><span id="line-815"> ioe);</span> |
| <span class="source-line-no">816</span><span id="line-816"> }</span> |
| <span class="source-line-no">817</span><span id="line-817"> Thread.sleep(1000);</span> |
| <span class="source-line-no">818</span><span id="line-818"> }</span> |
| <span class="source-line-no">819</span><span id="line-819"> throw new IOException("Region " + region + " failed to close within" + " timeout " + timeout);</span> |
| <span class="source-line-no">820</span><span id="line-820"> }</span> |
| <span class="source-line-no">821</span><span id="line-821"></span> |
| <span class="source-line-no">822</span><span id="line-822"> /**</span> |
| <span class="source-line-no">823</span><span id="line-823"> * Calculate min necessary to start. This is not an absolute. It is just a friction that will</span> |
| <span class="source-line-no">824</span><span id="line-824"> * cause us hang around a bit longer waiting on RegionServers to check-in.</span> |
| <span class="source-line-no">825</span><span id="line-825"> */</span> |
| <span class="source-line-no">826</span><span id="line-826"> private int getMinToStart() {</span> |
| <span class="source-line-no">827</span><span id="line-827"> if (master.isInMaintenanceMode()) {</span> |
| <span class="source-line-no">828</span><span id="line-828"> // If in maintenance mode, then in process region server hosting meta will be the only server</span> |
| <span class="source-line-no">829</span><span id="line-829"> // available</span> |
| <span class="source-line-no">830</span><span id="line-830"> return 1;</span> |
| <span class="source-line-no">831</span><span id="line-831"> }</span> |
| <span class="source-line-no">832</span><span id="line-832"></span> |
| <span class="source-line-no">833</span><span id="line-833"> int minimumRequired = 1;</span> |
| <span class="source-line-no">834</span><span id="line-834"> int minToStart = this.master.getConfiguration().getInt(WAIT_ON_REGIONSERVERS_MINTOSTART, -1);</span> |
| <span class="source-line-no">835</span><span id="line-835"> // Ensure we are never less than minimumRequired else stuff won't work.</span> |
| <span class="source-line-no">836</span><span id="line-836"> return Math.max(minToStart, minimumRequired);</span> |
| <span class="source-line-no">837</span><span id="line-837"> }</span> |
| <span class="source-line-no">838</span><span id="line-838"></span> |
| <span class="source-line-no">839</span><span id="line-839"> /**</span> |
| <span class="source-line-no">840</span><span id="line-840"> * Wait for the region servers to report in. We will wait until one of this condition is met: -</span> |
| <span class="source-line-no">841</span><span id="line-841"> * the master is stopped - the 'hbase.master.wait.on.regionservers.maxtostart' number of region</span> |
| <span class="source-line-no">842</span><span id="line-842"> * servers is reached - the 'hbase.master.wait.on.regionservers.mintostart' is reached AND there</span> |
| <span class="source-line-no">843</span><span id="line-843"> * have been no new region server in for 'hbase.master.wait.on.regionservers.interval' time AND</span> |
| <span class="source-line-no">844</span><span id="line-844"> * the 'hbase.master.wait.on.regionservers.timeout' is reached</span> |
| <span class="source-line-no">845</span><span id="line-845"> */</span> |
| <span class="source-line-no">846</span><span id="line-846"> public void waitForRegionServers(MonitoredTask status) throws InterruptedException {</span> |
| <span class="source-line-no">847</span><span id="line-847"> final long interval =</span> |
| <span class="source-line-no">848</span><span id="line-848"> this.master.getConfiguration().getLong(WAIT_ON_REGIONSERVERS_INTERVAL, 1500);</span> |
| <span class="source-line-no">849</span><span id="line-849"> final long timeout =</span> |
| <span class="source-line-no">850</span><span id="line-850"> this.master.getConfiguration().getLong(WAIT_ON_REGIONSERVERS_TIMEOUT, 4500);</span> |
| <span class="source-line-no">851</span><span id="line-851"> // Min is not an absolute; just a friction making us wait longer on server checkin.</span> |
| <span class="source-line-no">852</span><span id="line-852"> int minToStart = getMinToStart();</span> |
| <span class="source-line-no">853</span><span id="line-853"> int maxToStart =</span> |
| <span class="source-line-no">854</span><span id="line-854"> this.master.getConfiguration().getInt(WAIT_ON_REGIONSERVERS_MAXTOSTART, Integer.MAX_VALUE);</span> |
| <span class="source-line-no">855</span><span id="line-855"> if (maxToStart < minToStart) {</span> |
| <span class="source-line-no">856</span><span id="line-856"> LOG.warn(String.format("The value of '%s' (%d) is set less than '%s' (%d), ignoring.",</span> |
| <span class="source-line-no">857</span><span id="line-857"> WAIT_ON_REGIONSERVERS_MAXTOSTART, maxToStart, WAIT_ON_REGIONSERVERS_MINTOSTART,</span> |
| <span class="source-line-no">858</span><span id="line-858"> minToStart));</span> |
| <span class="source-line-no">859</span><span id="line-859"> maxToStart = Integer.MAX_VALUE;</span> |
| <span class="source-line-no">860</span><span id="line-860"> }</span> |
| <span class="source-line-no">861</span><span id="line-861"></span> |
| <span class="source-line-no">862</span><span id="line-862"> long now = EnvironmentEdgeManager.currentTime();</span> |
| <span class="source-line-no">863</span><span id="line-863"> final long startTime = now;</span> |
| <span class="source-line-no">864</span><span id="line-864"> long slept = 0;</span> |
| <span class="source-line-no">865</span><span id="line-865"> long lastLogTime = 0;</span> |
| <span class="source-line-no">866</span><span id="line-866"> long lastCountChange = startTime;</span> |
| <span class="source-line-no">867</span><span id="line-867"> int count = countOfRegionServers();</span> |
| <span class="source-line-no">868</span><span id="line-868"> int oldCount = 0;</span> |
| <span class="source-line-no">869</span><span id="line-869"> // This while test is a little hard to read. We try to comment it in below but in essence:</span> |
| <span class="source-line-no">870</span><span id="line-870"> // Wait if Master is not stopped and the number of regionservers that have checked-in is</span> |
| <span class="source-line-no">871</span><span id="line-871"> // less than the maxToStart. Both of these conditions will be true near universally.</span> |
| <span class="source-line-no">872</span><span id="line-872"> // Next, we will keep cycling if ANY of the following three conditions are true:</span> |
| <span class="source-line-no">873</span><span id="line-873"> // 1. The time since a regionserver registered is < interval (means servers are actively</span> |
| <span class="source-line-no">874</span><span id="line-874"> // checking in).</span> |
| <span class="source-line-no">875</span><span id="line-875"> // 2. We are under the total timeout.</span> |
| <span class="source-line-no">876</span><span id="line-876"> // 3. The count of servers is < minimum.</span> |
| <span class="source-line-no">877</span><span id="line-877"> for (ServerListener listener : this.listeners) {</span> |
| <span class="source-line-no">878</span><span id="line-878"> listener.waiting();</span> |
| <span class="source-line-no">879</span><span id="line-879"> }</span> |
| <span class="source-line-no">880</span><span id="line-880"> while (</span> |
| <span class="source-line-no">881</span><span id="line-881"> !this.master.isStopped() && !isClusterShutdown() && count < maxToStart</span> |
| <span class="source-line-no">882</span><span id="line-882"> && ((lastCountChange + interval) > now || timeout > slept || count < minToStart)</span> |
| <span class="source-line-no">883</span><span id="line-883"> ) {</span> |
| <span class="source-line-no">884</span><span id="line-884"> // Log some info at every interval time or if there is a change</span> |
| <span class="source-line-no">885</span><span id="line-885"> if (oldCount != count || lastLogTime + interval < now) {</span> |
| <span class="source-line-no">886</span><span id="line-886"> lastLogTime = now;</span> |
| <span class="source-line-no">887</span><span id="line-887"> String msg =</span> |
| <span class="source-line-no">888</span><span id="line-888"> "Waiting on regionserver count=" + count + "; waited=" + slept + "ms, expecting min="</span> |
| <span class="source-line-no">889</span><span id="line-889"> + minToStart + " server(s), max=" + getStrForMax(maxToStart) + " server(s), "</span> |
| <span class="source-line-no">890</span><span id="line-890"> + "timeout=" + timeout + "ms, lastChange=" + (now - lastCountChange) + "ms";</span> |
| <span class="source-line-no">891</span><span id="line-891"> LOG.info(msg);</span> |
| <span class="source-line-no">892</span><span id="line-892"> status.setStatus(msg);</span> |
| <span class="source-line-no">893</span><span id="line-893"> }</span> |
| <span class="source-line-no">894</span><span id="line-894"></span> |
| <span class="source-line-no">895</span><span id="line-895"> // We sleep for some time</span> |
| <span class="source-line-no">896</span><span id="line-896"> final long sleepTime = 50;</span> |
| <span class="source-line-no">897</span><span id="line-897"> Thread.sleep(sleepTime);</span> |
| <span class="source-line-no">898</span><span id="line-898"> now = EnvironmentEdgeManager.currentTime();</span> |
| <span class="source-line-no">899</span><span id="line-899"> slept = now - startTime;</span> |
| <span class="source-line-no">900</span><span id="line-900"></span> |
| <span class="source-line-no">901</span><span id="line-901"> oldCount = count;</span> |
| <span class="source-line-no">902</span><span id="line-902"> count = countOfRegionServers();</span> |
| <span class="source-line-no">903</span><span id="line-903"> if (count != oldCount) {</span> |
| <span class="source-line-no">904</span><span id="line-904"> lastCountChange = now;</span> |
| <span class="source-line-no">905</span><span id="line-905"> }</span> |
| <span class="source-line-no">906</span><span id="line-906"> }</span> |
| <span class="source-line-no">907</span><span id="line-907"> // Did we exit the loop because cluster is going down?</span> |
| <span class="source-line-no">908</span><span id="line-908"> if (isClusterShutdown()) {</span> |
| <span class="source-line-no">909</span><span id="line-909"> this.master.stop("Cluster shutdown");</span> |
| <span class="source-line-no">910</span><span id="line-910"> }</span> |
| <span class="source-line-no">911</span><span id="line-911"> LOG.info("Finished waiting on RegionServer count=" + count + "; waited=" + slept + "ms,"</span> |
| <span class="source-line-no">912</span><span id="line-912"> + " expected min=" + minToStart + " server(s), max=" + getStrForMax(maxToStart)</span> |
| <span class="source-line-no">913</span><span id="line-913"> + " server(s)," + " master is " + (this.master.isStopped() ? "stopped." : "running"));</span> |
| <span class="source-line-no">914</span><span id="line-914"> }</span> |
| <span class="source-line-no">915</span><span id="line-915"></span> |
| <span class="source-line-no">916</span><span id="line-916"> private String getStrForMax(final int max) {</span> |
| <span class="source-line-no">917</span><span id="line-917"> return max == Integer.MAX_VALUE ? "NO_LIMIT" : Integer.toString(max);</span> |
| <span class="source-line-no">918</span><span id="line-918"> }</span> |
| <span class="source-line-no">919</span><span id="line-919"></span> |
| <span class="source-line-no">920</span><span id="line-920"> /** Returns A copy of the internal list of online servers. */</span> |
| <span class="source-line-no">921</span><span id="line-921"> public List<ServerName> getOnlineServersList() {</span> |
| <span class="source-line-no">922</span><span id="line-922"> // TODO: optimize the load balancer call so we don't need to make a new list</span> |
| <span class="source-line-no">923</span><span id="line-923"> // TODO: FIX. THIS IS POPULAR CALL.</span> |
| <span class="source-line-no">924</span><span id="line-924"> return new ArrayList<>(this.onlineServers.keySet());</span> |
| <span class="source-line-no">925</span><span id="line-925"> }</span> |
| <span class="source-line-no">926</span><span id="line-926"></span> |
| <span class="source-line-no">927</span><span id="line-927"> /**</span> |
| <span class="source-line-no">928</span><span id="line-928"> * @param keys The target server name</span> |
| <span class="source-line-no">929</span><span id="line-929"> * @param idleServerPredicator Evaluates the server on the given load</span> |
| <span class="source-line-no">930</span><span id="line-930"> * @return A copy of the internal list of online servers matched by the predicator</span> |
| <span class="source-line-no">931</span><span id="line-931"> */</span> |
| <span class="source-line-no">932</span><span id="line-932"> public List<ServerName> getOnlineServersListWithPredicator(List<ServerName> keys,</span> |
| <span class="source-line-no">933</span><span id="line-933"> Predicate<ServerMetrics> idleServerPredicator) {</span> |
| <span class="source-line-no">934</span><span id="line-934"> List<ServerName> names = new ArrayList<>();</span> |
| <span class="source-line-no">935</span><span id="line-935"> if (keys != null && idleServerPredicator != null) {</span> |
| <span class="source-line-no">936</span><span id="line-936"> keys.forEach(name -> {</span> |
| <span class="source-line-no">937</span><span id="line-937"> ServerMetrics load = onlineServers.get(name);</span> |
| <span class="source-line-no">938</span><span id="line-938"> if (load != null) {</span> |
| <span class="source-line-no">939</span><span id="line-939"> if (idleServerPredicator.test(load)) {</span> |
| <span class="source-line-no">940</span><span id="line-940"> names.add(name);</span> |
| <span class="source-line-no">941</span><span id="line-941"> }</span> |
| <span class="source-line-no">942</span><span id="line-942"> }</span> |
| <span class="source-line-no">943</span><span id="line-943"> });</span> |
| <span class="source-line-no">944</span><span id="line-944"> }</span> |
| <span class="source-line-no">945</span><span id="line-945"> return names;</span> |
| <span class="source-line-no">946</span><span id="line-946"> }</span> |
| <span class="source-line-no">947</span><span id="line-947"></span> |
| <span class="source-line-no">948</span><span id="line-948"> /** Returns A copy of the internal list of draining servers. */</span> |
| <span class="source-line-no">949</span><span id="line-949"> public List<ServerName> getDrainingServersList() {</span> |
| <span class="source-line-no">950</span><span id="line-950"> return new ArrayList<>(this.drainingServers);</span> |
| <span class="source-line-no">951</span><span id="line-951"> }</span> |
| <span class="source-line-no">952</span><span id="line-952"></span> |
| <span class="source-line-no">953</span><span id="line-953"> public boolean isServerOnline(ServerName serverName) {</span> |
| <span class="source-line-no">954</span><span id="line-954"> return serverName != null && onlineServers.containsKey(serverName);</span> |
| <span class="source-line-no">955</span><span id="line-955"> }</span> |
| <span class="source-line-no">956</span><span id="line-956"></span> |
| <span class="source-line-no">957</span><span id="line-957"> public enum ServerLiveState {</span> |
| <span class="source-line-no">958</span><span id="line-958"> LIVE,</span> |
| <span class="source-line-no">959</span><span id="line-959"> DEAD,</span> |
| <span class="source-line-no">960</span><span id="line-960"> UNKNOWN</span> |
| <span class="source-line-no">961</span><span id="line-961"> }</span> |
| <span class="source-line-no">962</span><span id="line-962"></span> |
| <span class="source-line-no">963</span><span id="line-963"> /** Returns whether the server is online, dead, or unknown. */</span> |
| <span class="source-line-no">964</span><span id="line-964"> public synchronized ServerLiveState isServerKnownAndOnline(ServerName serverName) {</span> |
| <span class="source-line-no">965</span><span id="line-965"> return onlineServers.containsKey(serverName)</span> |
| <span class="source-line-no">966</span><span id="line-966"> ? ServerLiveState.LIVE</span> |
| <span class="source-line-no">967</span><span id="line-967"> : (deadservers.isDeadServer(serverName) ? ServerLiveState.DEAD : ServerLiveState.UNKNOWN);</span> |
| <span class="source-line-no">968</span><span id="line-968"> }</span> |
| <span class="source-line-no">969</span><span id="line-969"></span> |
| <span class="source-line-no">970</span><span id="line-970"> /**</span> |
| <span class="source-line-no">971</span><span id="line-971"> * Check if a server is known to be dead. A server can be online, or known to be dead, or unknown</span> |
| <span class="source-line-no">972</span><span id="line-972"> * to this manager (i.e, not online, not known to be dead either; it is simply not tracked by the</span> |
| <span class="source-line-no">973</span><span id="line-973"> * master any more, for example, a very old previous instance).</span> |
| <span class="source-line-no">974</span><span id="line-974"> */</span> |
| <span class="source-line-no">975</span><span id="line-975"> public synchronized boolean isServerDead(ServerName serverName) {</span> |
| <span class="source-line-no">976</span><span id="line-976"> return serverName == null || deadservers.isDeadServer(serverName);</span> |
| <span class="source-line-no">977</span><span id="line-977"> }</span> |
| <span class="source-line-no">978</span><span id="line-978"></span> |
| <span class="source-line-no">979</span><span id="line-979"> /**</span> |
| <span class="source-line-no">980</span><span id="line-980"> * Check if a server is unknown. A server can be online, or known to be dead, or unknown to this</span> |
| <span class="source-line-no">981</span><span id="line-981"> * manager (i.e, not online, not known to be dead either; it is simply not tracked by the master</span> |
| <span class="source-line-no">982</span><span id="line-982"> * any more, for example, a very old previous instance).</span> |
| <span class="source-line-no">983</span><span id="line-983"> */</span> |
| <span class="source-line-no">984</span><span id="line-984"> public boolean isServerUnknown(ServerName serverName) {</span> |
| <span class="source-line-no">985</span><span id="line-985"> return serverName == null</span> |
| <span class="source-line-no">986</span><span id="line-986"> || (!onlineServers.containsKey(serverName) && !deadservers.isDeadServer(serverName));</span> |
| <span class="source-line-no">987</span><span id="line-987"> }</span> |
| <span class="source-line-no">988</span><span id="line-988"></span> |
| <span class="source-line-no">989</span><span id="line-989"> public void shutdownCluster() {</span> |
| <span class="source-line-no">990</span><span id="line-990"> String statusStr = "Cluster shutdown requested of master=" + this.master.getServerName();</span> |
| <span class="source-line-no">991</span><span id="line-991"> LOG.info(statusStr);</span> |
| <span class="source-line-no">992</span><span id="line-992"> this.clusterShutdown.set(true);</span> |
| <span class="source-line-no">993</span><span id="line-993"> if (onlineServers.isEmpty()) {</span> |
| <span class="source-line-no">994</span><span id="line-994"> // we do not synchronize here so this may cause a double stop, but not a big deal</span> |
| <span class="source-line-no">995</span><span id="line-995"> master.stop("OnlineServer=0 right after cluster shutdown set");</span> |
| <span class="source-line-no">996</span><span id="line-996"> }</span> |
| <span class="source-line-no">997</span><span id="line-997"> }</span> |
| <span class="source-line-no">998</span><span id="line-998"></span> |
| <span class="source-line-no">999</span><span id="line-999"> public boolean isClusterShutdown() {</span> |
| <span class="source-line-no">1000</span><span id="line-1000"> return this.clusterShutdown.get();</span> |
| <span class="source-line-no">1001</span><span id="line-1001"> }</span> |
| <span class="source-line-no">1002</span><span id="line-1002"></span> |
| <span class="source-line-no">1003</span><span id="line-1003"> /**</span> |
| <span class="source-line-no">1004</span><span id="line-1004"> * start chore in ServerManager</span> |
| <span class="source-line-no">1005</span><span id="line-1005"> */</span> |
| <span class="source-line-no">1006</span><span id="line-1006"> public void startChore() {</span> |
| <span class="source-line-no">1007</span><span id="line-1007"> Configuration c = master.getConfiguration();</span> |
| <span class="source-line-no">1008</span><span id="line-1008"> if (persistFlushedSequenceId) {</span> |
| <span class="source-line-no">1009</span><span id="line-1009"> new Thread(() -> {</span> |
| <span class="source-line-no">1010</span><span id="line-1010"> // after AM#loadMeta, RegionStates should be loaded, and some regions are</span> |
| <span class="source-line-no">1011</span><span id="line-1011"> // deleted by drop/split/merge during removeDeletedRegionFromLoadedFlushedSequenceIds,</span> |
| <span class="source-line-no">1012</span><span id="line-1012"> // but these deleted regions are not added back to RegionStates,</span> |
| <span class="source-line-no">1013</span><span id="line-1013"> // so we can safely remove deleted regions.</span> |
| <span class="source-line-no">1014</span><span id="line-1014"> removeDeletedRegionFromLoadedFlushedSequenceIds();</span> |
| <span class="source-line-no">1015</span><span id="line-1015"> }, "RemoveDeletedRegionSyncThread").start();</span> |
| <span class="source-line-no">1016</span><span id="line-1016"> int flushPeriod =</span> |
| <span class="source-line-no">1017</span><span id="line-1017"> c.getInt(FLUSHEDSEQUENCEID_FLUSHER_INTERVAL, FLUSHEDSEQUENCEID_FLUSHER_INTERVAL_DEFAULT);</span> |
| <span class="source-line-no">1018</span><span id="line-1018"> flushedSeqIdFlusher = new FlushedSequenceIdFlusher("FlushedSequenceIdFlusher", flushPeriod);</span> |
| <span class="source-line-no">1019</span><span id="line-1019"> master.getChoreService().scheduleChore(flushedSeqIdFlusher);</span> |
| <span class="source-line-no">1020</span><span id="line-1020"> }</span> |
| <span class="source-line-no">1021</span><span id="line-1021"> }</span> |
| <span class="source-line-no">1022</span><span id="line-1022"></span> |
| <span class="source-line-no">1023</span><span id="line-1023"> /**</span> |
| <span class="source-line-no">1024</span><span id="line-1024"> * Stop the ServerManager.</span> |
| <span class="source-line-no">1025</span><span id="line-1025"> */</span> |
| <span class="source-line-no">1026</span><span id="line-1026"> public void stop() {</span> |
| <span class="source-line-no">1027</span><span id="line-1027"> if (flushedSeqIdFlusher != null) {</span> |
| <span class="source-line-no">1028</span><span id="line-1028"> flushedSeqIdFlusher.shutdown();</span> |
| <span class="source-line-no">1029</span><span id="line-1029"> }</span> |
| <span class="source-line-no">1030</span><span id="line-1030"> if (persistFlushedSequenceId) {</span> |
| <span class="source-line-no">1031</span><span id="line-1031"> try {</span> |
| <span class="source-line-no">1032</span><span id="line-1032"> persistRegionLastFlushedSequenceIds();</span> |
| <span class="source-line-no">1033</span><span id="line-1033"> } catch (IOException e) {</span> |
| <span class="source-line-no">1034</span><span id="line-1034"> LOG.warn("Failed to persist last flushed sequence id of regions" + " to file system", e);</span> |
| <span class="source-line-no">1035</span><span id="line-1035"> }</span> |
| <span class="source-line-no">1036</span><span id="line-1036"> }</span> |
| <span class="source-line-no">1037</span><span id="line-1037"> }</span> |
| <span class="source-line-no">1038</span><span id="line-1038"></span> |
| <span class="source-line-no">1039</span><span id="line-1039"> /**</span> |
| <span class="source-line-no">1040</span><span id="line-1040"> * Creates a list of possible destinations for a region. It contains the online servers, but not</span> |
| <span class="source-line-no">1041</span><span id="line-1041"> * the draining or dying servers.</span> |
| <span class="source-line-no">1042</span><span id="line-1042"> * @param serversToExclude can be null if there is no server to exclude</span> |
| <span class="source-line-no">1043</span><span id="line-1043"> */</span> |
| <span class="source-line-no">1044</span><span id="line-1044"> public List<ServerName> createDestinationServersList(final List<ServerName> serversToExclude) {</span> |
| <span class="source-line-no">1045</span><span id="line-1045"> Set<ServerName> destServers = new HashSet<>();</span> |
| <span class="source-line-no">1046</span><span id="line-1046"> onlineServers.forEach((sn, sm) -> {</span> |
| <span class="source-line-no">1047</span><span id="line-1047"> if (sm.getLastReportTimestamp() > 0) {</span> |
| <span class="source-line-no">1048</span><span id="line-1048"> // This means we have already called regionServerReport at leaset once, then let's include</span> |
| <span class="source-line-no">1049</span><span id="line-1049"> // this server for region assignment. This is an optimization to avoid assigning regions to</span> |
| <span class="source-line-no">1050</span><span id="line-1050"> // an uninitialized server. See HBASE-25032 for more details.</span> |
| <span class="source-line-no">1051</span><span id="line-1051"> destServers.add(sn);</span> |
| <span class="source-line-no">1052</span><span id="line-1052"> }</span> |
| <span class="source-line-no">1053</span><span id="line-1053"> });</span> |
| <span class="source-line-no">1054</span><span id="line-1054"></span> |
| <span class="source-line-no">1055</span><span id="line-1055"> if (serversToExclude != null) {</span> |
| <span class="source-line-no">1056</span><span id="line-1056"> destServers.removeAll(serversToExclude);</span> |
| <span class="source-line-no">1057</span><span id="line-1057"> }</span> |
| <span class="source-line-no">1058</span><span id="line-1058"></span> |
| <span class="source-line-no">1059</span><span id="line-1059"> // Loop through the draining server list and remove them from the server list</span> |
| <span class="source-line-no">1060</span><span id="line-1060"> final List<ServerName> drainingServersCopy = getDrainingServersList();</span> |
| <span class="source-line-no">1061</span><span id="line-1061"> destServers.removeAll(drainingServersCopy);</span> |
| <span class="source-line-no">1062</span><span id="line-1062"></span> |
| <span class="source-line-no">1063</span><span id="line-1063"> return new ArrayList<>(destServers);</span> |
| <span class="source-line-no">1064</span><span id="line-1064"> }</span> |
| <span class="source-line-no">1065</span><span id="line-1065"></span> |
| <span class="source-line-no">1066</span><span id="line-1066"> /**</span> |
| <span class="source-line-no">1067</span><span id="line-1067"> * Calls {@link #createDestinationServersList} without server to exclude.</span> |
| <span class="source-line-no">1068</span><span id="line-1068"> */</span> |
| <span class="source-line-no">1069</span><span id="line-1069"> public List<ServerName> createDestinationServersList() {</span> |
| <span class="source-line-no">1070</span><span id="line-1070"> return createDestinationServersList(null);</span> |
| <span class="source-line-no">1071</span><span id="line-1071"> }</span> |
| <span class="source-line-no">1072</span><span id="line-1072"></span> |
| <span class="source-line-no">1073</span><span id="line-1073"> /**</span> |
| <span class="source-line-no">1074</span><span id="line-1074"> * To clear any dead server with same host name and port of any online server</span> |
| <span class="source-line-no">1075</span><span id="line-1075"> */</span> |
| <span class="source-line-no">1076</span><span id="line-1076"> void clearDeadServersWithSameHostNameAndPortOfOnlineServer() {</span> |
| <span class="source-line-no">1077</span><span id="line-1077"> for (ServerName serverName : getOnlineServersList()) {</span> |
| <span class="source-line-no">1078</span><span id="line-1078"> deadservers.cleanAllPreviousInstances(serverName);</span> |
| <span class="source-line-no">1079</span><span id="line-1079"> }</span> |
| <span class="source-line-no">1080</span><span id="line-1080"> }</span> |
| <span class="source-line-no">1081</span><span id="line-1081"></span> |
| <span class="source-line-no">1082</span><span id="line-1082"> /**</span> |
| <span class="source-line-no">1083</span><span id="line-1083"> * Called by delete table and similar to notify the ServerManager that a region was removed.</span> |
| <span class="source-line-no">1084</span><span id="line-1084"> */</span> |
| <span class="source-line-no">1085</span><span id="line-1085"> public void removeRegion(final RegionInfo regionInfo) {</span> |
| <span class="source-line-no">1086</span><span id="line-1086"> final byte[] encodedName = regionInfo.getEncodedNameAsBytes();</span> |
| <span class="source-line-no">1087</span><span id="line-1087"> storeFlushedSequenceIdsByRegion.remove(encodedName);</span> |
| <span class="source-line-no">1088</span><span id="line-1088"> flushedSequenceIdByRegion.remove(encodedName);</span> |
| <span class="source-line-no">1089</span><span id="line-1089"> }</span> |
| <span class="source-line-no">1090</span><span id="line-1090"></span> |
| <span class="source-line-no">1091</span><span id="line-1091"> public boolean isRegionInServerManagerStates(final RegionInfo hri) {</span> |
| <span class="source-line-no">1092</span><span id="line-1092"> final byte[] encodedName = hri.getEncodedNameAsBytes();</span> |
| <span class="source-line-no">1093</span><span id="line-1093"> return (storeFlushedSequenceIdsByRegion.containsKey(encodedName)</span> |
| <span class="source-line-no">1094</span><span id="line-1094"> || flushedSequenceIdByRegion.containsKey(encodedName));</span> |
| <span class="source-line-no">1095</span><span id="line-1095"> }</span> |
| <span class="source-line-no">1096</span><span id="line-1096"></span> |
| <span class="source-line-no">1097</span><span id="line-1097"> /**</span> |
| <span class="source-line-no">1098</span><span id="line-1098"> * Called by delete table and similar to notify the ServerManager that a region was removed.</span> |
| <span class="source-line-no">1099</span><span id="line-1099"> */</span> |
| <span class="source-line-no">1100</span><span id="line-1100"> public void removeRegions(final List<RegionInfo> regions) {</span> |
| <span class="source-line-no">1101</span><span id="line-1101"> for (RegionInfo hri : regions) {</span> |
| <span class="source-line-no">1102</span><span id="line-1102"> removeRegion(hri);</span> |
| <span class="source-line-no">1103</span><span id="line-1103"> }</span> |
| <span class="source-line-no">1104</span><span id="line-1104"> }</span> |
| <span class="source-line-no">1105</span><span id="line-1105"></span> |
| <span class="source-line-no">1106</span><span id="line-1106"> /**</span> |
| <span class="source-line-no">1107</span><span id="line-1107"> * May return 0 when server is not online.</span> |
| <span class="source-line-no">1108</span><span id="line-1108"> */</span> |
| <span class="source-line-no">1109</span><span id="line-1109"> public int getVersionNumber(ServerName serverName) {</span> |
| <span class="source-line-no">1110</span><span id="line-1110"> ServerMetrics serverMetrics = onlineServers.get(serverName);</span> |
| <span class="source-line-no">1111</span><span id="line-1111"> return serverMetrics != null ? serverMetrics.getVersionNumber() : 0;</span> |
| <span class="source-line-no">1112</span><span id="line-1112"> }</span> |
| <span class="source-line-no">1113</span><span id="line-1113"></span> |
| <span class="source-line-no">1114</span><span id="line-1114"> /**</span> |
| <span class="source-line-no">1115</span><span id="line-1115"> * May return "0.0.0" when server is not online</span> |
| <span class="source-line-no">1116</span><span id="line-1116"> */</span> |
| <span class="source-line-no">1117</span><span id="line-1117"> public String getVersion(ServerName serverName) {</span> |
| <span class="source-line-no">1118</span><span id="line-1118"> ServerMetrics serverMetrics = onlineServers.get(serverName);</span> |
| <span class="source-line-no">1119</span><span id="line-1119"> return serverMetrics != null ? serverMetrics.getVersion() : "0.0.0";</span> |
| <span class="source-line-no">1120</span><span id="line-1120"> }</span> |
| <span class="source-line-no">1121</span><span id="line-1121"></span> |
| <span class="source-line-no">1122</span><span id="line-1122"> public int getInfoPort(ServerName serverName) {</span> |
| <span class="source-line-no">1123</span><span id="line-1123"> ServerMetrics serverMetrics = onlineServers.get(serverName);</span> |
| <span class="source-line-no">1124</span><span id="line-1124"> return serverMetrics != null ? serverMetrics.getInfoServerPort() : 0;</span> |
| <span class="source-line-no">1125</span><span id="line-1125"> }</span> |
| <span class="source-line-no">1126</span><span id="line-1126"></span> |
| <span class="source-line-no">1127</span><span id="line-1127"> /**</span> |
| <span class="source-line-no">1128</span><span id="line-1128"> * Persist last flushed sequence id of each region to HDFS</span> |
| <span class="source-line-no">1129</span><span id="line-1129"> * @throws IOException if persit to HDFS fails</span> |
| <span class="source-line-no">1130</span><span id="line-1130"> */</span> |
| <span class="source-line-no">1131</span><span id="line-1131"> private void persistRegionLastFlushedSequenceIds() throws IOException {</span> |
| <span class="source-line-no">1132</span><span id="line-1132"> if (isFlushSeqIdPersistInProgress) {</span> |
| <span class="source-line-no">1133</span><span id="line-1133"> return;</span> |
| <span class="source-line-no">1134</span><span id="line-1134"> }</span> |
| <span class="source-line-no">1135</span><span id="line-1135"> isFlushSeqIdPersistInProgress = true;</span> |
| <span class="source-line-no">1136</span><span id="line-1136"> try {</span> |
| <span class="source-line-no">1137</span><span id="line-1137"> Configuration conf = master.getConfiguration();</span> |
| <span class="source-line-no">1138</span><span id="line-1138"> Path rootDir = CommonFSUtils.getRootDir(conf);</span> |
| <span class="source-line-no">1139</span><span id="line-1139"> Path lastFlushedSeqIdPath = new Path(rootDir, LAST_FLUSHED_SEQ_ID_FILE);</span> |
| <span class="source-line-no">1140</span><span id="line-1140"> FileSystem fs = FileSystem.get(conf);</span> |
| <span class="source-line-no">1141</span><span id="line-1141"> if (fs.exists(lastFlushedSeqIdPath)) {</span> |
| <span class="source-line-no">1142</span><span id="line-1142"> LOG.info("Rewriting .lastflushedseqids file at: " + lastFlushedSeqIdPath);</span> |
| <span class="source-line-no">1143</span><span id="line-1143"> if (!fs.delete(lastFlushedSeqIdPath, false)) {</span> |
| <span class="source-line-no">1144</span><span id="line-1144"> throw new IOException("Unable to remove existing " + lastFlushedSeqIdPath);</span> |
| <span class="source-line-no">1145</span><span id="line-1145"> }</span> |
| <span class="source-line-no">1146</span><span id="line-1146"> } else {</span> |
| <span class="source-line-no">1147</span><span id="line-1147"> LOG.info("Writing .lastflushedseqids file at: " + lastFlushedSeqIdPath);</span> |
| <span class="source-line-no">1148</span><span id="line-1148"> }</span> |
| <span class="source-line-no">1149</span><span id="line-1149"> FSDataOutputStream out = fs.create(lastFlushedSeqIdPath);</span> |
| <span class="source-line-no">1150</span><span id="line-1150"> FlushedSequenceId.Builder flushedSequenceIdBuilder = FlushedSequenceId.newBuilder();</span> |
| <span class="source-line-no">1151</span><span id="line-1151"> try {</span> |
| <span class="source-line-no">1152</span><span id="line-1152"> for (Entry<byte[], Long> entry : flushedSequenceIdByRegion.entrySet()) {</span> |
| <span class="source-line-no">1153</span><span id="line-1153"> FlushedRegionSequenceId.Builder flushedRegionSequenceIdBuilder =</span> |
| <span class="source-line-no">1154</span><span id="line-1154"> FlushedRegionSequenceId.newBuilder();</span> |
| <span class="source-line-no">1155</span><span id="line-1155"> flushedRegionSequenceIdBuilder.setRegionEncodedName(ByteString.copyFrom(entry.getKey()));</span> |
| <span class="source-line-no">1156</span><span id="line-1156"> flushedRegionSequenceIdBuilder.setSeqId(entry.getValue());</span> |
| <span class="source-line-no">1157</span><span id="line-1157"> ConcurrentNavigableMap<byte[], Long> storeSeqIds =</span> |
| <span class="source-line-no">1158</span><span id="line-1158"> storeFlushedSequenceIdsByRegion.get(entry.getKey());</span> |
| <span class="source-line-no">1159</span><span id="line-1159"> if (storeSeqIds != null) {</span> |
| <span class="source-line-no">1160</span><span id="line-1160"> for (Entry<byte[], Long> store : storeSeqIds.entrySet()) {</span> |
| <span class="source-line-no">1161</span><span id="line-1161"> FlushedStoreSequenceId.Builder flushedStoreSequenceIdBuilder =</span> |
| <span class="source-line-no">1162</span><span id="line-1162"> FlushedStoreSequenceId.newBuilder();</span> |
| <span class="source-line-no">1163</span><span id="line-1163"> flushedStoreSequenceIdBuilder.setFamily(ByteString.copyFrom(store.getKey()));</span> |
| <span class="source-line-no">1164</span><span id="line-1164"> flushedStoreSequenceIdBuilder.setSeqId(store.getValue());</span> |
| <span class="source-line-no">1165</span><span id="line-1165"> flushedRegionSequenceIdBuilder.addStores(flushedStoreSequenceIdBuilder);</span> |
| <span class="source-line-no">1166</span><span id="line-1166"> }</span> |
| <span class="source-line-no">1167</span><span id="line-1167"> }</span> |
| <span class="source-line-no">1168</span><span id="line-1168"> flushedSequenceIdBuilder.addRegionSequenceId(flushedRegionSequenceIdBuilder);</span> |
| <span class="source-line-no">1169</span><span id="line-1169"> }</span> |
| <span class="source-line-no">1170</span><span id="line-1170"> flushedSequenceIdBuilder.build().writeDelimitedTo(out);</span> |
| <span class="source-line-no">1171</span><span id="line-1171"> } finally {</span> |
| <span class="source-line-no">1172</span><span id="line-1172"> if (out != null) {</span> |
| <span class="source-line-no">1173</span><span id="line-1173"> out.close();</span> |
| <span class="source-line-no">1174</span><span id="line-1174"> }</span> |
| <span class="source-line-no">1175</span><span id="line-1175"> }</span> |
| <span class="source-line-no">1176</span><span id="line-1176"> } finally {</span> |
| <span class="source-line-no">1177</span><span id="line-1177"> isFlushSeqIdPersistInProgress = false;</span> |
| <span class="source-line-no">1178</span><span id="line-1178"> }</span> |
| <span class="source-line-no">1179</span><span id="line-1179"> }</span> |
| <span class="source-line-no">1180</span><span id="line-1180"></span> |
| <span class="source-line-no">1181</span><span id="line-1181"> /**</span> |
| <span class="source-line-no">1182</span><span id="line-1182"> * Load last flushed sequence id of each region from HDFS, if persisted</span> |
| <span class="source-line-no">1183</span><span id="line-1183"> */</span> |
| <span class="source-line-no">1184</span><span id="line-1184"> public void loadLastFlushedSequenceIds() throws IOException {</span> |
| <span class="source-line-no">1185</span><span id="line-1185"> if (!persistFlushedSequenceId) {</span> |
| <span class="source-line-no">1186</span><span id="line-1186"> return;</span> |
| <span class="source-line-no">1187</span><span id="line-1187"> }</span> |
| <span class="source-line-no">1188</span><span id="line-1188"> Configuration conf = master.getConfiguration();</span> |
| <span class="source-line-no">1189</span><span id="line-1189"> Path rootDir = CommonFSUtils.getRootDir(conf);</span> |
| <span class="source-line-no">1190</span><span id="line-1190"> Path lastFlushedSeqIdPath = new Path(rootDir, LAST_FLUSHED_SEQ_ID_FILE);</span> |
| <span class="source-line-no">1191</span><span id="line-1191"> FileSystem fs = FileSystem.get(conf);</span> |
| <span class="source-line-no">1192</span><span id="line-1192"> if (!fs.exists(lastFlushedSeqIdPath)) {</span> |
| <span class="source-line-no">1193</span><span id="line-1193"> LOG.info("No .lastflushedseqids found at " + lastFlushedSeqIdPath</span> |
| <span class="source-line-no">1194</span><span id="line-1194"> + " will record last flushed sequence id"</span> |
| <span class="source-line-no">1195</span><span id="line-1195"> + " for regions by regionserver report all over again");</span> |
| <span class="source-line-no">1196</span><span id="line-1196"> return;</span> |
| <span class="source-line-no">1197</span><span id="line-1197"> } else {</span> |
| <span class="source-line-no">1198</span><span id="line-1198"> LOG.info("begin to load .lastflushedseqids at " + lastFlushedSeqIdPath);</span> |
| <span class="source-line-no">1199</span><span id="line-1199"> }</span> |
| <span class="source-line-no">1200</span><span id="line-1200"> FSDataInputStream in = fs.open(lastFlushedSeqIdPath);</span> |
| <span class="source-line-no">1201</span><span id="line-1201"> try {</span> |
| <span class="source-line-no">1202</span><span id="line-1202"> FlushedSequenceId flushedSequenceId = FlushedSequenceId.parseDelimitedFrom(in);</span> |
| <span class="source-line-no">1203</span><span id="line-1203"> if (flushedSequenceId == null) {</span> |
| <span class="source-line-no">1204</span><span id="line-1204"> LOG.info(".lastflushedseqids found at {} is empty", lastFlushedSeqIdPath);</span> |
| <span class="source-line-no">1205</span><span id="line-1205"> return;</span> |
| <span class="source-line-no">1206</span><span id="line-1206"> }</span> |
| <span class="source-line-no">1207</span><span id="line-1207"> for (FlushedRegionSequenceId flushedRegionSequenceId : flushedSequenceId</span> |
| <span class="source-line-no">1208</span><span id="line-1208"> .getRegionSequenceIdList()) {</span> |
| <span class="source-line-no">1209</span><span id="line-1209"> byte[] encodedRegionName = flushedRegionSequenceId.getRegionEncodedName().toByteArray();</span> |
| <span class="source-line-no">1210</span><span id="line-1210"> flushedSequenceIdByRegion.putIfAbsent(encodedRegionName,</span> |
| <span class="source-line-no">1211</span><span id="line-1211"> flushedRegionSequenceId.getSeqId());</span> |
| <span class="source-line-no">1212</span><span id="line-1212"> if (</span> |
| <span class="source-line-no">1213</span><span id="line-1213"> flushedRegionSequenceId.getStoresList() != null</span> |
| <span class="source-line-no">1214</span><span id="line-1214"> && flushedRegionSequenceId.getStoresList().size() != 0</span> |
| <span class="source-line-no">1215</span><span id="line-1215"> ) {</span> |
| <span class="source-line-no">1216</span><span id="line-1216"> ConcurrentNavigableMap<byte[], Long> storeFlushedSequenceId =</span> |
| <span class="source-line-no">1217</span><span id="line-1217"> computeIfAbsent(storeFlushedSequenceIdsByRegion, encodedRegionName,</span> |
| <span class="source-line-no">1218</span><span id="line-1218"> () -> new ConcurrentSkipListMap<>(Bytes.BYTES_COMPARATOR));</span> |
| <span class="source-line-no">1219</span><span id="line-1219"> for (FlushedStoreSequenceId flushedStoreSequenceId : flushedRegionSequenceId</span> |
| <span class="source-line-no">1220</span><span id="line-1220"> .getStoresList()) {</span> |
| <span class="source-line-no">1221</span><span id="line-1221"> storeFlushedSequenceId.put(flushedStoreSequenceId.getFamily().toByteArray(),</span> |
| <span class="source-line-no">1222</span><span id="line-1222"> flushedStoreSequenceId.getSeqId());</span> |
| <span class="source-line-no">1223</span><span id="line-1223"> }</span> |
| <span class="source-line-no">1224</span><span id="line-1224"> }</span> |
| <span class="source-line-no">1225</span><span id="line-1225"> }</span> |
| <span class="source-line-no">1226</span><span id="line-1226"> } finally {</span> |
| <span class="source-line-no">1227</span><span id="line-1227"> in.close();</span> |
| <span class="source-line-no">1228</span><span id="line-1228"> }</span> |
| <span class="source-line-no">1229</span><span id="line-1229"> }</span> |
| <span class="source-line-no">1230</span><span id="line-1230"></span> |
| <span class="source-line-no">1231</span><span id="line-1231"> /**</span> |
| <span class="source-line-no">1232</span><span id="line-1232"> * Regions may have been removed between latest persist of FlushedSequenceIds and master abort. So</span> |
| <span class="source-line-no">1233</span><span id="line-1233"> * after loading FlushedSequenceIds from file, and after meta loaded, we need to remove the</span> |
| <span class="source-line-no">1234</span><span id="line-1234"> * deleted region according to RegionStates.</span> |
| <span class="source-line-no">1235</span><span id="line-1235"> */</span> |
| <span class="source-line-no">1236</span><span id="line-1236"> public void removeDeletedRegionFromLoadedFlushedSequenceIds() {</span> |
| <span class="source-line-no">1237</span><span id="line-1237"> RegionStates regionStates = master.getAssignmentManager().getRegionStates();</span> |
| <span class="source-line-no">1238</span><span id="line-1238"> Iterator<byte[]> it = flushedSequenceIdByRegion.keySet().iterator();</span> |
| <span class="source-line-no">1239</span><span id="line-1239"> while (it.hasNext()) {</span> |
| <span class="source-line-no">1240</span><span id="line-1240"> byte[] regionEncodedName = it.next();</span> |
| <span class="source-line-no">1241</span><span id="line-1241"> if (regionStates.getRegionState(Bytes.toStringBinary(regionEncodedName)) == null) {</span> |
| <span class="source-line-no">1242</span><span id="line-1242"> it.remove();</span> |
| <span class="source-line-no">1243</span><span id="line-1243"> storeFlushedSequenceIdsByRegion.remove(regionEncodedName);</span> |
| <span class="source-line-no">1244</span><span id="line-1244"> }</span> |
| <span class="source-line-no">1245</span><span id="line-1245"> }</span> |
| <span class="source-line-no">1246</span><span id="line-1246"> }</span> |
| <span class="source-line-no">1247</span><span id="line-1247"></span> |
| <span class="source-line-no">1248</span><span id="line-1248"> private class FlushedSequenceIdFlusher extends ScheduledChore {</span> |
| <span class="source-line-no">1249</span><span id="line-1249"></span> |
| <span class="source-line-no">1250</span><span id="line-1250"> public FlushedSequenceIdFlusher(String name, int p) {</span> |
| <span class="source-line-no">1251</span><span id="line-1251"> super(name, master, p, 60 * 1000); // delay one minute before first execute</span> |
| <span class="source-line-no">1252</span><span id="line-1252"> }</span> |
| <span class="source-line-no">1253</span><span id="line-1253"></span> |
| <span class="source-line-no">1254</span><span id="line-1254"> @Override</span> |
| <span class="source-line-no">1255</span><span id="line-1255"> protected void chore() {</span> |
| <span class="source-line-no">1256</span><span id="line-1256"> try {</span> |
| <span class="source-line-no">1257</span><span id="line-1257"> persistRegionLastFlushedSequenceIds();</span> |
| <span class="source-line-no">1258</span><span id="line-1258"> } catch (IOException e) {</span> |
| <span class="source-line-no">1259</span><span id="line-1259"> LOG.debug("Failed to persist last flushed sequence id of regions" + " to file system", e);</span> |
| <span class="source-line-no">1260</span><span id="line-1260"> }</span> |
| <span class="source-line-no">1261</span><span id="line-1261"> }</span> |
| <span class="source-line-no">1262</span><span id="line-1262"> }</span> |
| <span class="source-line-no">1263</span><span id="line-1263">}</span> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </pre> |
| </div> |
| </main> |
| </body> |
| </html> |