blob: 2a360205e0538e0ff0bd79bac2be86d2a1dbb4f0 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.apache.bookkeeper.bookie;
import static java.lang.Long.max;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LEDGER_CACHE_HIT;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LEDGER_CACHE_MISS;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.LEDGER_CACHE_READ_PAGE;
import static org.apache.bookkeeper.bookie.BookKeeperServerStats.NUM_INDEX_PAGES;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.bookkeeper.bookie.stats.IndexInMemPageMgrStats;
import org.apache.bookkeeper.conf.ServerConfiguration;
import org.apache.bookkeeper.stats.Counter;
import org.apache.bookkeeper.stats.Gauge;
import org.apache.bookkeeper.stats.OpStatsLogger;
import org.apache.bookkeeper.stats.StatsLogger;
import org.apache.bookkeeper.util.DirectMemoryUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
class IndexInMemPageMgr {
private static final Logger LOG = LoggerFactory.getLogger(IndexInMemPageMgr.class);
private static final ConcurrentHashMap<Long, LedgerEntryPage> EMPTY_PAGE_MAP =
new ConcurrentHashMap<Long, LedgerEntryPage>();
private static class InMemPageCollection implements LEPStateChangeCallback {
final ConcurrentMap<Long, ConcurrentMap<Long, LedgerEntryPage>> pages;
final Map<EntryKey, LedgerEntryPage> lruCleanPageMap;
final ConcurrentLinkedQueue<LedgerEntryPage> listOfFreePages;
// Stats
private final IndexInMemPageMgrStats inMemPageMgrStats;
public InMemPageCollection(StatsLogger statsLogger) {
pages = new ConcurrentHashMap<>();
lruCleanPageMap =
Collections.synchronizedMap(new LinkedHashMap<EntryKey, LedgerEntryPage>(16, 0.75f, true));
listOfFreePages = new ConcurrentLinkedQueue<LedgerEntryPage>();
inMemPageMgrStats = new IndexInMemPageMgrStats(statsLogger);
* Retrieve the LedgerEntryPage corresponding to the ledger and firstEntry.
* @param ledgerId Ledger id
* @param firstEntry Id of the first entry in the page
* @returns LedgerEntryPage if present
private LedgerEntryPage getPage(long ledgerId, long firstEntry) {
ConcurrentMap<Long, LedgerEntryPage> map = pages.get(ledgerId);
if (null != map) {
return map.get(firstEntry);
return null;
* Add a LedgerEntryPage to the page map.
* @param lep Ledger Entry Page object
private LedgerEntryPage putPage(LedgerEntryPage lep) {
// Do a get here to avoid too many new ConcurrentHashMaps() as putIntoTable is called frequently.
ConcurrentMap<Long, LedgerEntryPage> map = pages.get(lep.getLedger());
if (null == map) {
ConcurrentMap<Long, LedgerEntryPage> mapToPut = new ConcurrentHashMap<Long, LedgerEntryPage>();
map = pages.putIfAbsent(lep.getLedger(), mapToPut);
if (null == map) {
map = mapToPut;
LedgerEntryPage oldPage = map.putIfAbsent(lep.getFirstEntry(), lep);
if (null == oldPage) {
oldPage = lep;
// Also include this in the clean page map if it qualifies.
// Note: This is done for symmetry and correctness, however it should never
// get exercised since we shouldn't attempt a put without the page being in use
return oldPage;
* Traverse the pages for a given ledger in memory and find the highest entry amongst these pages.
* @param ledgerId Ledger id
* @returns last entry in the in memory pages
private long getLastEntryInMem(long ledgerId) {
long lastEntry = 0;
// Find the last entry in the cache
ConcurrentMap<Long, LedgerEntryPage> map = pages.get(ledgerId);
if (map != null) {
for (LedgerEntryPage lep: map.values()) {
if (lep.getMaxPossibleEntry() < lastEntry) {
long highest = lep.getLastEntry();
if (highest > lastEntry) {
lastEntry = highest;
return lastEntry;
* Removes ledger entry pages for a given ledger.
* @param ledgerId Ledger id
* @returns number of pages removed
private void removeEntriesForALedger(long ledgerId) {
// remove pages first to avoid page flushed when deleting file info
ConcurrentMap<Long, LedgerEntryPage> lPages = pages.remove(ledgerId);
if (null != lPages) {
for (Map.Entry<Long, LedgerEntryPage> pageEntry: lPages.entrySet()) {
LedgerEntryPage lep = pageEntry.getValue();
* Gets the list of pages in memory that have been changed and hence need to
* be written as a part of the flush operation that is being issued.
* @param ledgerId Ledger id
* @returns last entry in the in memory pages.
private LinkedList<Long> getFirstEntryListToBeFlushed(long ledgerId) {
ConcurrentMap<Long, LedgerEntryPage> pageMap = pages.get(ledgerId);
if (pageMap == null || pageMap.isEmpty()) {
return null;
LinkedList<Long> firstEntryList = new LinkedList<Long>();
for (ConcurrentMap.Entry<Long, LedgerEntryPage> entry: pageMap.entrySet()) {
LedgerEntryPage lep = entry.getValue();
if (lep.isClean()) {
if (!lep.inUse()) {
if (LOG.isTraceEnabled()) {
LOG.trace("Page is clean " + lep);
} else {
return firstEntryList;
* Add the LedgerEntryPage to the clean page LRU map.
* @param lep Ledger Entry Page object
private void addToCleanPagesList(LedgerEntryPage lep) {
synchronized (lruCleanPageMap) {
if (lep.isClean() && !lep.inUse()) {
lruCleanPageMap.put(lep.getEntryKey(), lep);
* Remove the LedgerEntryPage from the clean page LRU map.
* @param lep Ledger Entry Page object
private void removeFromCleanPageList(LedgerEntryPage lep) {
synchronized (lruCleanPageMap) {
if (!lep.isClean() || lep.inUse()) {
* Get the set of active ledgers.
Set<Long> getActiveLedgers() {
return pages.keySet();
* Get a clean page and provision it for the specified ledger and firstEntry within the ledger.
* @param ledgerId Ledger id
* @param firstEntry Id of the first entry in the page
* @returns LedgerEntryPage if present
LedgerEntryPage grabCleanPage(long ledgerId, long firstEntry) {
LedgerEntryPage lep = listOfFreePages.poll();
if (null != lep) {
lep.setLedgerAndFirstEntry(ledgerId, firstEntry);
return lep;
while (lruCleanPageMap.size() > 0) {
lep = null;
synchronized (lruCleanPageMap) {
Iterator<Map.Entry<EntryKey, LedgerEntryPage>> iterator = lruCleanPageMap.entrySet().iterator();
Map.Entry<EntryKey, LedgerEntryPage> entry = null;
while (iterator.hasNext()) {
entry =;
if (entry.getValue().isClean() && !entry.getValue().inUse()) {
lep = entry.getValue();
if (null == lep) {
if (LOG.isDebugEnabled()) {
LOG.debug("Did not find eligible page in the first pass");
return null;
// We found a candidate page, lets see if we can reclaim it before its re-used
ConcurrentMap<Long, LedgerEntryPage> pageMap = pages.get(lep.getLedger());
// Remove from map only if nothing has changed since we checked this lep.
// Its possible for the ledger to have been deleted or the page to have already
// been reclaimed. The page map is the definitive source of information, if anything
// has changed we should leave this page along and continue iterating to find
// another suitable page.
if ((null != pageMap) && (pageMap.remove(lep.getFirstEntry(), lep))) {
if (!lep.isClean()) {
// Someone wrote to this page while we were reclaiming it.
pageMap.put(lep.getFirstEntry(), lep);
lep = null;
} else {
// Do some bookkeeping on the page table
pages.remove(lep.getLedger(), EMPTY_PAGE_MAP);
// We can now safely reset this lep and return it.
lep.setLedgerAndFirstEntry(ledgerId, firstEntry);
return lep;
} else {
lep = null;
return lep;
public void addToListOfFreePages(LedgerEntryPage lep) {
if ((null == lep) || lep.inUse()) {
if (null != lep) {
public void onSetInUse(LedgerEntryPage lep) {
public void onResetInUse(LedgerEntryPage lep) {
if (!lep.isDeleted()) {
} else {
public void onSetClean(LedgerEntryPage lep) {
public void onSetDirty(LedgerEntryPage lep) {
final int pageSize;
final int entriesPerPage;
final int pageLimit;
final InMemPageCollection pageMapAndList;
// The number of pages that have actually been used
private final AtomicInteger pageCount = new AtomicInteger(0);
// The persistence manager that this page manager uses to
// flush and read pages
private final IndexPersistenceMgr indexPersistenceManager;
// Stats
private final Counter ledgerCacheHitCounter;
private final Counter ledgerCacheMissCounter;
private final OpStatsLogger ledgerCacheReadPageStats;
public IndexInMemPageMgr(int pageSize,
int entriesPerPage,
ServerConfiguration conf,
IndexPersistenceMgr indexPersistenceManager,
StatsLogger statsLogger) {
this.pageSize = pageSize;
this.entriesPerPage = entriesPerPage;
this.indexPersistenceManager = indexPersistenceManager;
this.pageMapAndList = new InMemPageCollection(statsLogger);
long maxDirectMemory = DirectMemoryUtils.maxDirectMemory();
if (conf.getPageLimit() <= 0) {
// By default, allocate a third of the direct memory to the page cache
this.pageLimit = (int) ((maxDirectMemory / 3) / this.pageSize);
} else {
this.pageLimit = conf.getPageLimit();
}"maxDirectMemory = {}, pageSize = {}, pageLimit = {}",
maxDirectMemory, pageSize, pageLimit);
// Expose Stats
this.ledgerCacheHitCounter = statsLogger.getCounter(LEDGER_CACHE_HIT);
this.ledgerCacheMissCounter = statsLogger.getCounter(LEDGER_CACHE_MISS);
this.ledgerCacheReadPageStats = statsLogger.getOpStatsLogger(LEDGER_CACHE_READ_PAGE);
// Export sampled stats for index pages, ledgers.
new Gauge<Integer>() {
public Integer getDefaultValue() {
return 0;
public Integer getSample() {
return getNumUsedPages();
* @return page size used in ledger cache
public int getPageSize() {
return pageSize;
* @return number of page used in ledger cache
private int getNumUsedPages() {
return pageCount.get();
* Get the ledger entry page for a given <i>pageEntry</i>.
* @param ledger
* ledger id
* @param pageEntry
* first entry id of a given page
* @return ledger entry page
* @throws IOException
LedgerEntryPage getLedgerEntryPage(long ledger,
long pageEntry) throws IOException {
LedgerEntryPage lep = getLedgerEntryPageFromCache(ledger, pageEntry, false);
if (lep == null) {;
lep = grabLedgerEntryPage(ledger, pageEntry);
} else {;
return lep;
LedgerEntryPage getLedgerEntryPageFromCache(long ledger,
long firstEntry,
boolean onlyDirty) {
LedgerEntryPage lep = pageMapAndList.getPage(ledger, firstEntry);
if (onlyDirty && null != lep && lep.isClean()) {
return null;
if (null != lep) {
return lep;
* Grab ledger entry page whose first entry is <code>pageEntry</code>.
* If the page doesn't existed before, we allocate a memory page.
* Otherwise, we grab a clean page and read it from disk.
* @param ledger Ledger Id
* @param pageEntry Start entry of this entry page.
private LedgerEntryPage grabLedgerEntryPage(long ledger, long pageEntry) throws IOException {
LedgerEntryPage lep = grabCleanPage(ledger, pageEntry);
try {
// should get the up to date page from the persistence manager
// before we put it into table otherwise we would put
// an empty page in it
Stopwatch readPageStopwatch = Stopwatch.createStarted();
boolean isNewPage = indexPersistenceManager.updatePage(lep);
if (!isNewPage) {
} catch (IOException ie) {
// if we grab a clean page, but failed to update the page
// we should put this page in the free page list so that it
// can be reassigned to the next grabPage request
throw ie;
LedgerEntryPage oldLep;
if (lep != (oldLep = pageMapAndList.putPage(lep))) {
// if we grab a clean page, but failed to put it in the cache
// we should put this page in the free page list so that it
// can be reassigned to the next grabPage request
// Increment the use count of the old lep because this is unexpected
lep = oldLep;
return lep;
void removePagesForLedger(long ledgerId) {
long getLastEntryInMem(long ledgerId) {
return pageMapAndList.getLastEntryInMem(ledgerId);
private LedgerEntryPage grabCleanPage(long ledger, long entry) throws IOException {
if (entry % entriesPerPage != 0) {
throw new IllegalArgumentException(entry + " is not a multiple of " + entriesPerPage);
while (true) {
boolean canAllocate = false;
if (pageCount.incrementAndGet() <= pageLimit) {
canAllocate = true;
} else {
if (canAllocate) {
LedgerEntryPage lep = new LedgerEntryPage(pageSize, entriesPerPage, pageMapAndList);
lep.setLedgerAndFirstEntry(ledger, entry);
return lep;
LedgerEntryPage lep = pageMapAndList.grabCleanPage(ledger, entry);
if (null != lep) {
return lep;
}"Could not grab a clean page for ledger {}, entry {}, force flushing dirty ledgers.",
ledger, entry);
void flushOneOrMoreLedgers(boolean doAll) throws IOException {
List<Long> ledgersToFlush = new ArrayList<>(pageMapAndList.getActiveLedgers());
for (Long potentiallyDirtyLedger : ledgersToFlush) {
try {
} catch (Bookie.NoLedgerException e) {
if (!doAll) {
* Flush a specified ledger.
* @param ledger Ledger Id
* @throws IOException
private void flushSpecificLedger(long ledger) throws IOException {
LinkedList<Long> firstEntryList = pageMapAndList.getFirstEntryListToBeFlushed(ledger);
// flush ledger index file header if necessary
if (null == firstEntryList || firstEntryList.size() == 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("Nothing to flush for ledger {}.", ledger);
// nothing to do
// Now flush all the pages of a ledger
List<LedgerEntryPage> entries = new ArrayList<LedgerEntryPage>(firstEntryList.size());
try {
for (Long firstEntry: firstEntryList) {
LedgerEntryPage lep = getLedgerEntryPageFromCache(ledger, firstEntry, true);
if (lep != null) {
indexPersistenceManager.flushLedgerEntries(ledger, entries);
} finally {
for (LedgerEntryPage lep: entries) {
void putEntryOffset(long ledger, long entry, long offset) throws IOException {
int offsetInPage = (int) (entry % entriesPerPage);
// find the id of the first entry of the page that has the entry
// we are looking for
long pageEntry = entry - offsetInPage;
LedgerEntryPage lep = null;
try {
lep = getLedgerEntryPage(ledger, pageEntry);
assert lep != null;
lep.setOffset(offset, offsetInPage * LedgerEntryPage.getIndexEntrySize());
} catch (FileInfo.FileInfoDeletedException e) {
throw new Bookie.NoLedgerException(ledger);
} finally {
if (null != lep) {
long getEntryOffset(long ledger, long entry) throws IOException {
int offsetInPage = (int) (entry % entriesPerPage);
// find the id of the first entry of the page that has the entry
// we are looking for
long pageEntry = entry - offsetInPage;
LedgerEntryPage lep = null;
try {
lep = getLedgerEntryPage(ledger, pageEntry);
return lep.getOffset(offsetInPage * LedgerEntryPage.getIndexEntrySize());
} finally {
if (lep != null) {
* Represents a page of the index.
private class PageEntriesImpl implements LedgerCache.PageEntries {
final long ledgerId;
final long initEntry;
PageEntriesImpl(long ledgerId, long initEntry) {
this.ledgerId = ledgerId;
this.initEntry = initEntry;
public LedgerEntryPage getLEP() throws IOException {
return getLedgerEntryPage(ledgerId, initEntry);
public long getFirstEntry() {
return initEntry;
public long getLastEntry() {
return initEntry + entriesPerPage;
* Iterable over index pages -- returns PageEntries rather than individual
* entries because getEntries() above needs to be able to throw an IOException.
private class PageEntriesIterableImpl implements LedgerCache.PageEntriesIterable {
final long ledgerId;
final FileInfoBackingCache.CachedFileInfo fi;
final long totalEntries;
long curEntry = 0;
PageEntriesIterableImpl(long ledgerId) throws IOException {
this.ledgerId = ledgerId; = indexPersistenceManager.getFileInfo(ledgerId, null);
this.totalEntries = max(entriesPerPage * (fi.size() / pageSize), getLastEntryInMem(ledgerId));
public Iterator<LedgerCache.PageEntries> iterator() {
return new Iterator<LedgerCache.PageEntries>() {
public boolean hasNext() {
return curEntry < totalEntries;
public LedgerCache.PageEntries next() {
LedgerCache.PageEntries next = new PageEntriesImpl(ledgerId, curEntry);
curEntry += entriesPerPage;
return next;
public void close() {
* Return iterator over pages for mapping entries to entry loggers.
* @param ledgerId
* @return Iterator over pages
* @throws IOException
public LedgerCache.PageEntriesIterable listEntries(long ledgerId) throws IOException {
return new PageEntriesIterableImpl(ledgerId);