blob: 0adbb77ba3d55973b7eabf66233614e7445f3435 [file] [log] [blame]
/*-------------------------------------------------------------------------
*
* HashVsHash.sq
* Cost model visualization
*
* Required software: Sysquake LE, which may be downloaded for free from
* http://www.calerga.com/products/Sysquake/index.html
* This script was developed using Sysquake 3.6 LE on MS Windows.
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*-------------------------------------------------------------------------
*/
title "HashVsHash cost model"
help
{@
This Sysquake LE script plots the results of the PostgreSQL or
Greenplum DB optimizer's cost models for comparison of two alternative
plans: hash join between T (outer) and U (inner), and hash join between
U (outer) and T (inner).
The intended users are database kernel developers and performance
analysts who are familiar with the concepts and source code of these
optimizers.
The "Parameters" area can be scrolled using a mouse wheel or the
hand cursor to bring all the controls into view. Unneeded controls
can be hidden using the "Show controls" checkboxes.
Kurt Harriman, greenplum inc
@}
version
{@
The cost model is thought to be mostly up-to-date with the current
versions of PostgreSQL and Greenplum DB as of
March 29, 2007
@}
variable relT
variable relU
variable joinsel
variable joinsize
// GUCs and #defines
variable costgucs
variable work_mem
variable NTUP_PER_BUCKET
variable blcksz
// blcksz(1) = BLCKSZ = current blocksize for cost model; adjustable via slider
// blcksz(2) same as (1) while modelVersion is a pg version, else stays frozen
// blcksz(3) false to keep (2) frozen; true to update (2) when (1) changes
// calculated
variable hashbuildT
variable hashbuildU
variable joincostTU
variable joincostUT
// keyboard input
variable scaleflag
// platform dependent
define MAXIMUM_ALIGNOF = 8
define sizeof_ptr = 8
// initial parameter values
define BLCKSZ_default = 8192 // for PostgreSQL versions
define rows_default = 1000
define rowSize_default = 100
// heap page capacity
define sizeof_PageHeaderData = 20 // storage/bufpage.h, up to pd_linp (first ItemId)
define MaxSpecialSpace = 32 // access/htup.h
define sizeof_ItemIdData = 4 // storage/itemid.h
// constants
define INT_MAX = 2147483647
// visible control group ids
variable _nondumpable visibleControls
define vc_explain_id = _auto
define vc_gucs_id = _auto
define vc_joinsel_id = _auto
define vc_quals_id = _auto
define vc_sizeadj_id = _auto
define vc_subplans_id = _auto
define vc_vers_id = _auto
// slider control ids
define blcksz_id = _auto
define explain_rows_id = _auto
define explain_rowSize_id = _auto
define explain_scanCost_id = _auto
define gucs_id = _auto
define joinrows_id = _auto
define ntup_per_bucket_id = _auto
define quals_id = _auto
define relT_id = _auto
define relU_id = _auto
define relT_rows_id = _auto
define relU_rows_id = _auto
define relT_rowSize_id = _auto
define relU_rowSize_id = _auto
define work_mem_id = _auto
// button and checkbox control ids
define modelVersion_id1 = _auto
define modelVersion_id2 = _auto
define modelVersion_id3 = _auto
define modelVersion_id4 = _auto
define modelVersion_idN = modelVersion_id4
define visibleControls_id1 = _auto
define visibleControls_id2 = _auto
define visibleControls_id3 = _auto
define visibleControls_id4 = _auto
define visibleControls_idN = visibleControls_id4
define optionCheckboxes_id1 = _auto
define optionCheckboxes_id2 = _auto
define optionCheckboxes_id3 = _auto
define optionCheckboxes_idN = optionCheckboxes_id3
// plot & line control ids
define outer_rows_id = _auto
define inner_rows_id = _auto
define joincost_id = _auto
define joincostTU_id = _auto
define joincostUT_id = _auto
define T_rows_id = _auto
define U_rows_id = _auto
define xytrace_id = _auto
// option checkboxes
variable _nondumpable optionCheckboxes
define oc_spilltweak = _auto
// cost model version ids
variable _nondumpable modelVersion // enumerator struct for the chosen version
variable _nondumpable modelVersionChooser
define ver_pg815 = _auto
define ver_pg820 = _auto // pg 8.2.0 uses costsize.c version 1.169
// This version introduced the MinimalTuple format
// for HJ workfiles etc., with shorter headers.
// See htup.h version 1.83 / MAIN:tgl:20060627213120
// Adds seq_page_cost GUC - see costsize.c 1.156 / MAIN:tgl:20060605024958
// pg 8.2.1 uses costsize.c version 1.169.2.1 /
// REL8_2_STABLE:tgl:20061215184235 - Skipping
// 8.2.1 here because HJ costing isn't affected
define ver_pg822 = _auto // pg 8.2.2 thru 8.2.4 all use costsize.c version 1.169.2.2
// REL8_2_STABLE:tgl:20070108160931
// Removes bias against HJ with inner rel larger than outer.
define ver_pg830 = _auto // pg 8.3.0 is still in development at this writing.
// At present this visualization reflects unreleased
// costsize.c version 1.179 dated 2007-03-27.
// Stored tuple header size reduced - see...
// htup.h ver 1.89 / MAIN:momjian:20070109220059
// htup.h ver 1.91 / MAIN:tgl:20070209033533
define ver_pgmin = ver_pg815
define ver_pgmax = ver_pg830
define ver_gp231 = _auto // gp 2.3.1 is based on pg 8.1.1
// uses costsize.c version 1.15.2.5 / Release-2_3_0-branch:kharriman:20070227024315
define ver_gp300 = _auto // gp 3.0 is based on pg 8.2.3
// BLCKSZ increased to 32K - see pg_config_manual.h ver 1.6 / MAIN:cmcdevitt:20070206024456
// uses costsize.c version 1.29
// Removes bias against HJ with inner rel larger than outer.
// Adds seq_page_cost GUC - see costsize.c 1.156 / MAIN:tgl:20060605024958
// Stored tuple header size reduced - see...
// pg htup.h ver 1.89 / MAIN:momjian:20070109220059 (will be in 8.3)
// pg htup.h ver 1.91 / MAIN:tgl:20070209033533 (will be in 8.3)
// gp htup.h ver 1.6 / MAIN:gsherry:20070301024612
// Num of buckets for hash join - new formula
// nodeHash.c ver 1.11 / MAIN:tkordas:20070313184638
// gp 3.0.0.1 skipped - no HJ costing changes
define ver_gp3002 = _auto // uses costsize.c version 1.29.6.5
// Adds gp_cost_hashjoin_chainwalk GUC, off by default - see
// costsize.c ver 1.29.6.4 / Release-3_0_0-branch:jzhang:20070816011436
// "#define NTUP_PER_BUCKET 10" replaced by gp_hashjoin_tuples_per_bucket guc with default value 5
// nodeHash.c ver 1.15.2.4 / Release-3_0_0-branch:tkordas:20070816172047
// nodeHash.c ver 1.15.2.5 / Release-3_0_0-branch:tkordas:20070816190818
// nodeHash.c ver 1.19 / MAIN:tkordas:20070816174001
// nodeHash.c ver 1.20 / MAIN:tkordas:20070816191722
// No other changes relevant to HJ cost.
define ver_gpmin = ver_gp231
define ver_gpmax = ver_gp3002
//define ver_default = ver_gpmax
define ver_default = ver_gp300
// bit bucket for unused result of multiple assignment
variable _nondumpable unused
//--------------------------------------------------------------------//
// Initialization of globals //
//--------------------------------------------------------------------//
init ( relT, relU, ...
joinsel, joinsize, ...
costgucs, work_mem, blcksz, NTUP_PER_BUCKET, ...
modelVersion, modelVersionChooser, optionCheckboxes, ...
visibleControls, ...
scaleflag, unused ) = init
function
{@
function ( relT, relU, ...
joinsel, joinsize, ...
costgucs, work_mem, blcksz, NTUP_PER_BUCKET, ...
modelVersion, modelVersionChooser, optionCheckboxes, ...
visibleControls, ...
scaleflag, unused ) = ...
init
subplots('HashVsHash\tControl Panel\nT HJ U\tHash Build (U inner)\nU HJ T\tHash Build (T inner)');
scaleflag = 0;
unused = [];
( modelVersion, modelVersionChooser, blcksz, NTUP_PER_BUCKET ) = ...
initVersions( ver_default, BLCKSZ_default );
optionCheckboxes = optionCheckboxes_init();
visibleControls = visibleControls_init();
costgucs = costgucs_init();
work_mem = 128 * 1024; // gp default is 128 MB
relT = rel_init( 'T', 'c', rows_default * 10, rowSize_default, costgucs.cpu_tuple_cost );
relU = rel_init( 'U', 'y', rows_default, rowSize_default / 10, costgucs.cpu_tuple_cost );
joinsize = joinsize_init( relT.initialRows, relU.initialRows, relT.initialRows );
joinsel = joinsel_init( joinsize.joinRows / joinsize.xprodRows, 1, 0 );
@}
//********************************************************************//
// //
// Structs //
// //
//********************************************************************//
//--------------------------------------------------------------------//
// PostgreSQL and Greenplum versions supported //
//--------------------------------------------------------------------//
function
{@
function ( modelVersion, modelVersionChooser, blcksz, NTUP_PER_BUCKET ) = ...
initVersions( ver_default, BLCKSZ_default )
versionPrototype = version_init([], 'versionPrototype', []);
versionPrototype.sizeof_HeapTupleHeaderData = MAXALIGN(27);
versionPrototype.sizeof_MinimalTupleData = versionPrototype.sizeof_HeapTupleHeaderData;
versionPrototype.NTUP_PER_BUCKET = 10;
versionEnumerators = {};
versionEnumerators = join(versionEnumerators, initPgVersionEnumerators( versionPrototype ));
versionEnumerators = join(versionEnumerators, initGpVersionEnumerators( versionPrototype ));
modelVersionChooser = enumChooser_init( versionEnumerators, ...
'Version of cost model', 4, ...
modelVersion_id1 : modelVersion_idN );
modelVersion_initial = enumChooser_id_to_enum( modelVersionChooser, ver_default );
blcksz = [ BLCKSZ_default, BLCKSZ_default, true ];
( modelVersion, blcksz, NTUP_PER_BUCKET ) = modelVersion_set( modelVersion_initial, blcksz );
function versions = ...
initPgVersionEnumerators(v)
v = version_init( v, 'pg 8.1.5', ver_pg815 );
versions{end+1} = v;
v = version_init( v, 'pg 8.2.0', ver_pg820 );
v.sizeof_MinimalTupleData = MAXALIGN(12); // MAIN:tgl:20060627213120
versions{end+1} = v;
v = version_init( v, 'pg 8.2.2', ver_pg822 );
versions{end+1} = v;
v = version_init( v, 'pg 8.3.0', ver_pg830 );
v.sizeof_HeapTupleHeaderData = MAXALIGN(23); // MAIN:tgl:20070209033533
versions{end+1} = v;
function versions = ...
initGpVersionEnumerators(v)
v = version_init( v, 'gp 2.3.1', ver_gp231 );
v.BLCKSZ = 8*1024;
versions{end+1} = v;
v = version_init( v, 'gp 3.0', ver_gp300 );
v.BLCKSZ = 32*1024; // MAIN:cmcdevitt:20070206024456 (pg_config_manual.h 1.6)
v.sizeof_MinimalTupleData = MAXALIGN(12); // MAIN:tgl:20060627213120
v.sizeof_HeapTupleHeaderData = MAXALIGN(23); // MAIN:gsherry:20070301024612
versions{end+1} = v;
v = version_init( v, 'gp 3.0.0.2', ver_gp3002 );
v.NTUP_PER_BUCKET = 5; // nodeHash.c ver 1.15.2.5 / Release-3_0_0-branch:tkordas:20070816190818
versions{end+1} = v;
function ( modelVersion, blcksz, NTUP_PER_BUCKET ) = ...
modelVersion_set( newVersion, blcksz )
if isfield( newVersion, 'BLCKSZ' )
// Reset BLCKSZ to version's default when switching to a gp version.
blcksz = [ newVersion.BLCKSZ, blcksz(2), false ];
else
// Don't reset BLCKSZ when switching between pg versions.
// When switching from gp to pg, restore latest pg BLCKSZ setting.
blcksz = [ blcksz(2), blcksz(2), true ];
end
NTUP_PER_BUCKET = newVersion.NTUP_PER_BUCKET;
modelVersion = newVersion;
function versionEnumerator = ...
version_init( copyFromVersion, enum_name, enum_id )
versionEnumerator = enumerator_init(enum_name, enum_id);
if ~isempty(copyFromVersion)
for fieldname = fieldnames(copyFromVersion)
if ~isfield(versionEnumerator, fieldname)
versionEnumerator.(fieldname) = copyFromVersion.(fieldname);
end
end
end
function blcksz = ...
blcksz_set( blcksz, newBLCKSZ )
// If pg version, save a copy of the BLCKSZ for modelVersion_set()
if blcksz(3)
blcksz(2) = newBLCKSZ;
end
blcksz(1) = newBLCKSZ;
@}
//--------------------------------------------------------------------//
// costgucs //
//--------------------------------------------------------------------//
function
{@
function costgucs = ...
costgucs_init
costgucs = struct();
costgucs.cpu_tuple_cost = 0.01;
costgucs.cpu_operator_cost = 0.0025;
costgucs.seq_page_cost = 1;
costgucs.hj_chainwalk_weight = 1;
@}
//--------------------------------------------------------------------//
// optionCheckboxes //
//--------------------------------------------------------------------//
function
{@
function optionCheckboxes = ...
optionCheckboxes_init()
enums = {};
enums{end+1} = enumerator_init( 'Batch 0 spill cost ' , oc_spilltweak );
// add items here
optionChooser = enumChooser_init( enums, ...
'Tweaks', 2, ...
optionCheckboxes_id1 : optionCheckboxes_idN );
optionCheckboxes = checkboxes_init( [], optionChooser );
@}
//--------------------------------------------------------------------//
// rel //
//--------------------------------------------------------------------//
// NB. User can set initialRows, initialRowSize and initialScanCost from
// an EXPLAIN report. From initialRows and initialScanCost we compute
// scanCostPerRow. Subsequently we compute the join's subplan cost as
// scanCost = rows * scanCostPerRow; this is added into the join cost.
// 'rows' is initialized from initialRows but may then be changed by
// the user to experiment with varying the amount of data entering the
// join. As 'rows' varies, we update the scanCost accordingly using the
// scanCostPerRow derived from initialRows and initialCost.
function
{@
function rel = ...
rel_init( name, rowLineStyle, rows, rowSize, scanCostPerRow )
rel = struct();
rel.name = name;
rel.rows = rows;
rel.rowSize = MAXALIGN( rowSize );
rel.rowLineStyle = rowLineStyle;
rel.initialRows = rows;
rel.initialRowSize = rowSize;
rel.initialScanCost = rows * scanCostPerRow;
rel.scanCost = rel.initialScanCost;
rel.scanCostPerRow = scanCostPerRow;
function ( rel, joinsel, joinsize ) = ...
rel_initialRows_drag( rel_old, whichrel, joinsel_old, joinsize_old, nb, x1 )
if isempty( nb ) || x1 <= 0
cancel;
end
rel = rel_old;
joinsel = joinsel_old;
joinsize = joinsize_old;
rows = ceil( x1 );
if whichrel == 1
joinsize = joinsize_init( rows, joinsize.rowsU, joinsize.joinRows );
else
joinsize = joinsize_init( joinsize.rowsT, rows, joinsize.joinRows );
end
rel.rows = rows;
rel.initialRows = rows;
rel = rel_initialScanCost_drag( rel, 0, rel.initialScanCost );
joinsel.selectivity = joinsize.joinRows / joinsize.xprodRows;
function rel = ...
rel_initialRowSize_drag( rel_old, nb, x1 )
if isempty( nb ) || x1 < 0
cancel;
end
rel = rel_old;
rel.initialRowSize = ceil( x1 );
rel.rowSize = MAXALIGN( rel.initialRowSize );
function rel = ...
rel_initialScanCost_drag( rel_old, nb, x1 )
if isempty( nb ) || x1 < 0
cancel;
end
rel = rel_old;
rel.initialScanCost = x1;
rel.scanCostPerRow = rel.initialScanCost / rel.initialRows;
rel.scanCost = rel.rows * rel.scanCostPerRow;
function rel = ...
rel_rows_drag( rel_old, nb, x1 )
if isempty( nb )
cancel;
end
rel = rel_old;
rel.rows = ceil( x1 );
rel.scanCost = rel.rows * rel.scanCostPerRow;
function ( msg, rel ) = ...
rel_rows_dragmsg( rel, nb, x1 )
rel = rel_rows_drag( rel, nb, x1 );
msg = rel_rows_mouseover( rel, rel.rows );
function ( msg, cursor ) = ...
rel_rows_mouseover( rel, x0 )
msg = sprintf( '%s = %d rows', rel.name, x0 );
cursor = true;
function rel = ...
rel_rowSize_drag( rel_old, nb, x1 )
if isempty(nb)
cancel;
end
rel = rel_old;
rel.rowSize = MAXALIGN(round(x1));
@}
//--------------------------------------------------------------------//
// joinsel //
//--------------------------------------------------------------------//
function
{@
function joinsel = ...
joinsel_init( selectivity, nquals_hash, nquals_postjoin )
joinsel = struct();
joinsel.selectivity = selectivity;
joinsel.nquals_hash = nquals_hash;
joinsel.nquals_postjoin = nquals_postjoin;
@}
//--------------------------------------------------------------------//
// joinsize //
//--------------------------------------------------------------------//
function
{@
function joinsize = ...
joinsize_init( rowsT, rowsU, joinRows )
joinsize = struct();
joinsize.rowsT = rowsT;
joinsize.rowsU = rowsU;
joinsize.xprodRows = rowsT * rowsU;
joinsize.joinRows = min( joinRows, joinsize.xprodRows );
@}
//--------------------------------------------------------------------//
// hashbuild //
//--------------------------------------------------------------------//
make hashbuildT = hashbuild_make( relT, work_mem, NTUP_PER_BUCKET, ...
joinsel, costgucs, blcksz, ...
modelVersion, optionCheckboxes )
make hashbuildU = hashbuild_make( relU, work_mem, NTUP_PER_BUCKET, ...
joinsel, costgucs, blcksz, ...
modelVersion, optionCheckboxes )
function
{@
function hashbuild = ...
hashbuild_make( rel, work_mem, NTUP_PER_BUCKET, ...
joinsel, costgucs, blcksz, ...
modelVersion, optionCheckboxes )
hashbuild = struct();
( hashbuild.nbuckets, hashbuild.nbatch, hashbuild.bytes ) = ...
ExecChooseHashTableSize( rel.rows, rel.rowSize, ...
work_mem, NTUP_PER_BUCKET, ...
modelVersion, optionCheckboxes );
( hashbuild.totalCost, hashbuild.ioCost, hashbuild.cpuCost ) = ...
cost_hashjoin( 0, 0, 0, 0, ...
rel.rows, rel.rowSize, rel.scanCost, ...
hashbuild.nbuckets, hashbuild.nbatch, ...
joinsel, costgucs, blcksz, ...
modelVersion, optionCheckboxes );
@}
//--------------------------------------------------------------------//
// joincostTU/UT //
//--------------------------------------------------------------------//
make joincostTU = ...
joincost_make( relT, relU, hashbuildU, ...
joinsel, costgucs, blcksz, modelVersion, optionCheckboxes )
make joincostUT = ...
joincost_make( relU, relT, hashbuildT, ...
joinsel, costgucs, blcksz, modelVersion, optionCheckboxes )
function
{@
function hjCost = ...
joincost_make( relOuter, relInner, hashbuildInner, ...
joinsel, costgucs, blcksz, modelVersion, optionCheckboxes )
( hjCost, hjIO, hjCPU ) = ...
cost_hashjoin( relOuter.rows, relOuter.rowSize, relOuter.scanCost, 0, ...
relInner.rows, relInner.rowSize, relInner.scanCost, ...
hashbuildInner.nbuckets, hashbuildInner.nbatch, ...
joinsel, costgucs, blcksz, modelVersion, optionCheckboxes );
@}
//********************************************************************//
// //
// User Interface //
// //
//********************************************************************//
//--------------------------------------------------------------------//
// Keyboard event handlers //
//--------------------------------------------------------------------//
keydown "s" scaleflag = scaleflag_keydown( scaleflag )
function
{@
function scaleflag = ...
scaleflag_keydown( scaleflag )
scaleflag = ~scaleflag;
@}
//--------------------------------------------------------------------//
// VisibleControls //
//--------------------------------------------------------------------//
function
{@
function visibleControls = ...
visibleControls_init()
enums = {};
enums{end+1} = enumerator_init( 'Version' , vc_vers_id );
enums{end+1} = enumerator_init( 'Size Adj' , vc_sizeadj_id );
enums{end+1} = enumerator_init( 'GUCs' , vc_gucs_id );
enums{end+1} = enumerator_init( 'Quals' , vc_quals_id );
enums{end+1} = enumerator_init( 'Selectivity' , vc_joinsel_id );
enums{end+1} = enumerator_init( 'Subplans' , vc_subplans_id );
enums{end+1} = enumerator_init( 'Explain' , vc_explain_id );
visibleControlsChooser = enumChooser_init( enums, ...
'Show controls', 4, ...
visibleControls_id1 : visibleControls_idN );
initiallyVisible = [ vc_vers_id ];
visibleControls = checkboxes_init( initiallyVisible, visibleControlsChooser );
@}
//--------------------------------------------------------------------//
// "Control Panel" //
//--------------------------------------------------------------------//
figure "Control Panel"
mouseover
_msg = params_mouseover()
mouseup visibleControls_id1
visibleControls = ...
checkboxes_mouseup( visibleControls, _id, _x1, _x0 )
mouseup visibleControls_id2
visibleControls = ...
checkboxes_mouseup( visibleControls, _id, _x1, _x0 )
mouseup visibleControls_id3
visibleControls = ...
checkboxes_mouseup( visibleControls, _id, _x1, _x0 )
mouseup modelVersion_id1
( modelVersion, blcksz, NTUP_PER_BUCKET ) = ...
vers_mouseup( modelVersionChooser, blcksz, _id, _x1 )
mouseup modelVersion_id2
( modelVersion, blcksz, NTUP_PER_BUCKET ) = ...
vers_mouseup( modelVersionChooser, blcksz, _id, _x1 )
mouseup modelVersion_id3
( modelVersion, blcksz, NTUP_PER_BUCKET ) = ...
vers_mouseup( modelVersionChooser, blcksz, _id, _x1 )
mouseup optionCheckboxes_id1
optionCheckboxes = ...
checkboxes_mouseup( optionCheckboxes, _id, _x1, _x0 )
mouseup optionCheckboxes_id2
optionCheckboxes = ...
checkboxes_mouseup( optionCheckboxes, _id, _x1, _x0 )
mouseup optionCheckboxes_id3
optionCheckboxes = ...
checkboxes_mouseup( optionCheckboxes, _id, _x1, _x0 )
mousedrag blcksz_id
blcksz = blcksz_drag( blcksz, _nb, _x1 )
mousedrag explain_rows_id
( relT, relU, joinsel, joinsize ) = ...
explain_rows_drag( relT, relU, joinsel, joinsize, _nb, _x1 )
mousedrag explain_rowSize_id
( relT, relU ) = ...
explain_rowSize_drag( relT, relU, _nb, _x1 )
mousedrag explain_scanCost_id
( relT, relU ) = ...
explain_scanCost_drag( relT, relU, _nb, _x1 )
mousedrag gucs_id
costgucs = ...
gucs_drag( costgucs, _nb, _x1 )
mousedrag joinrows_id
( joinsel, joinsize ) = ...
joinrows_drag( joinsel, joinsize, _nb, _x1 )
mousedrag quals_id
joinsel = ...
quals_drag( joinsel, _nb, _x1 )
mousedrag ntup_per_bucket_id
NTUP_PER_BUCKET = intSlider_drag(_nb, _x1)
mousedrag relT_rows_id
relT = ...
rel_rows_drag( relT, _nb, _x1 )
mousedrag relU_rows_id
relU = ...
rel_rows_drag( relU, _nb, _x1 )
mousedrag relT_rowSize_id
relT = ...
rel_rowSize_drag( relT, _nb, _x1 )
mousedrag relU_rowSize_id
relU = ...
rel_rowSize_drag( relU, _nb, _x1 )
mousedrag work_mem_id
work_mem = work_mem_drag(_nb, _x1)
draw
params_draw( relT, relU, hashbuildT, hashbuildU, joinsel, joinsize, ...
costgucs, work_mem, blcksz, NTUP_PER_BUCKET, ...
modelVersion, modelVersionChooser, optionCheckboxes, ...
joincostTU, joincostUT, ...
visibleControls )
function
{@
function ...
params_draw( relT, relU, hashbuildT, hashbuildU, joinsel, joinsize, ...
costgucs, work_mem, blcksz, NTUP_PER_BUCKET, ...
modelVersion, modelVersionChooser, optionCheckboxes, ...
joincostTU, joincostUT, ...
visibleControls )
title( sprintf( '%s Cost: T x U = %.0f U x T = %.0f', ...
modelVersion.enum_name, joincostTU, joincostUT ) );
firsttab = 'hj_chainwalk_weight (0.0000)____'; // make room for widest label
checkboxes_draw( visibleControls, firsttab );
for vc_enum_id = checkboxes_chosen( visibleControls )
switch vc_enum_id
case vc_vers_id
vers_draw( blcksz, modelVersion, modelVersionChooser, optionCheckboxes, firsttab );
case vc_explain_id
explain_draw( firsttab, relT, relU, joinsize, work_mem );
case vc_gucs_id
gucs_draw( costgucs, work_mem, NTUP_PER_BUCKET );
case vc_joinsel_id
joinsize_draw( joinsize );
case vc_quals_id
quals_draw( joinsel );
case vc_sizeadj_id
sizeadj_draw( relT, relU, hashbuildT, hashbuildU, joinsel );
case vc_subplans_id
subplans_draw( relT, relU );
end
end
function msg = ...
params_mouseover()
msg = 'Control panel area can be dragged up/down or scrolled with mousewheel.';
function blcksz = ...
blcksz_drag( blcksz, nb, x1 )
if isempty(nb)
cancel;
end
blcksz = blcksz_set( blcksz, 2^round(x1+12) );
function ...
explain_draw( firsttab, relT, relU, joinsize, work_mem )
text( '-------------------- EXPLAIN plan' );
settabs( [ firsttab, '\t000000000000000\t \t000000000000000' ] );
textfield( 'Joined rows out', '%.0f', joinsize.joinRows, '', joinrows_id );
textfield( 'Rows to join (T, U)\t', '%.0f%.0f', [ joinsize.rowsT, joinsize.rowsU ], '', explain_rows_id );
textfield( 'Subplan cost (T, U)\t', '%.0f%.0f%', [ relT.initialScanCost, relU.initialScanCost ], '', explain_scanCost_id );
textfield( 'Row size (T, U)\t', '%.0f%.0f', [ relT.initialRowSize, relU.initialRowSize ], '', explain_rowSize_id );
textfield( 'work_mem (KB)', '%.0f', work_mem, '', work_mem_id );
text( ' Please ignore extra trailing digits or junk in the fields above (Sysquake bug).' );
settabs( firsttab );
function ( relT, relU, joinsel, joinsize ) = ...
explain_rows_drag( relT, relU, joinsel, joinsize, nb, x1 )
if isempty( nb ) || x1 <= 0
cancel;
end
switch nb
case 1
( relT, joinsel, joinsize ) = rel_initialRows_drag( relT, 1, joinsel, joinsize, nb, x1 );
case 2
( relU, joinsel, joinsize ) = rel_initialRows_drag( relU, 2, joinsel, joinsize, nb, x1 );
end
function ( relT, relU ) = ...
explain_rowSize_drag( relT, relU, nb, x1 )
if isempty( nb ) || x1 <= 0
cancel;
end
switch nb
case 1
relT = rel_initialRowSize_drag( relT, nb, x1 );
case 2
relU = rel_initialRowSize_drag( relU, nb, x1 );
end
function ( relT, relU ) = ...
explain_scanCost_drag( relT, relU, nb, x1 )
if isempty( nb ) || x1 <= 0
cancel;
end
switch nb
case 1
relT = rel_initialScanCost_drag( relT, nb, x1 );
case 2
relU = rel_initialScanCost_drag( relU, nb, x1 );
end
function ...
gucs_draw( costgucs, work_mem, NTUP_PER_BUCKET )
text('-------------------- GUCs');
labels = sprintf( 'work_mem (%d KB)', work_mem );
slider( labels, work_mem, [32,4*1024*1024], 'L', '', work_mem_id );
labels = sprintf( 'cpu_tuple_cost (%f)\ncpu_operator_cost (%f)\nseq_page_cost (%f)\nhj_chainwalk_weight (%f)', ...
costgucs.cpu_tuple_cost, costgucs.cpu_operator_cost, ...
costgucs.seq_page_cost, costgucs.hj_chainwalk_weight );
slider( labels, ...
[costgucs.cpu_tuple_cost; costgucs.cpu_operator_cost; costgucs.seq_page_cost; costgucs.hj_chainwalk_weight], ...
[0,0.1; 0,0.1; 0,10; 1e-4,100], '---L', '', gucs_id );
labels = sprintf('ntup_per_bucket (%d)', NTUP_PER_BUCKET);
slider(labels, NTUP_PER_BUCKET, [1,20], '', '', ntup_per_bucket_id);
function costgucs = ...
gucs_drag( costgucs_old, nb, x1 )
if isempty(nb)
cancel;
end
costgucs = costgucs_old;
switch nb
case 1
costgucs.cpu_tuple_cost = x1;
case 2
costgucs.cpu_operator_cost = x1;
case 3
costgucs.seq_page_cost = x1;
case 4
costgucs.hj_chainwalk_weight = x1;
end
function ...
joinsize_draw( joinsize )
invsel = joinsize.xprodRows / joinsize.joinRows;
text( sprintf( '-------------------- Join selectivity = 1 / %.0f', invsel ) );
labels = sprintf( 'T rows to join (%d)\nU rows to join (%d)', joinsize.rowsT, joinsize.rowsU );
slider( labels, [ joinsize.rowsT; joinsize.rowsU ], [ 1,1e9 ], 'L', '', explain_rows_id );
labels = sprintf( 'Joined rows out (%.0f)', joinsize.joinRows );
slider( labels, joinsize.joinRows, [ 1,joinsize.xprodRows ], 'L', '', joinrows_id );
function ( joinsel, joinsize ) = ...
joinrows_drag( joinsel_old, joinsize_old, nb, x1 )
if isempty(nb) || x1 < 0
cancel;
end
joinsel = joinsel_old;
joinsize = joinsize_old;
joinsize.joinRows = min( ceil( x1 ), joinsize.xprodRows );
joinsel.selectivity = joinsize.joinRows / joinsize.xprodRows;
function ...
quals_draw( joinsel )
text( '-------------------- Join quals' );
labels = sprintf( 'Hash Cond (%d)\nJoin Filter (%d)', ...
joinsel.nquals_hash, joinsel.nquals_postjoin );
slider( labels, [ joinsel.nquals_hash; joinsel.nquals_postjoin ], [ 0,10 ], '', '', quals_id );
function joinsel = ...
quals_drag( joinsel_old, nb, x1 )
if isempty(nb)
cancel;
end
joinsel = joinsel_old;
switch nb
case 1
joinsel.nquals_hash = round( x1 );
case 2
joinsel.nquals_postjoin = round( x1 );
end
function ...
sizeadj_draw( relT, relU, hashbuildT, hashbuildU, joinsel )
bytesT = relT.rows * relT.rowSize;
bytesU = relU.rows * relU.rowSize;
joinResultRows = relT.rows * relU.rows * joinsel.selectivity;
text( sprintf( '-------------------- Size Adj\t%s; Out = %s', ...
fmtsizepair( 'T = %.0f %sB ', hashbuildT.bytes, ...
' U = %.0f %sB', hashbuildU.bytes ), ...
fmtsize( '%.0f %s rows', joinResultRows ) ) );
labels = sprintf( 'Rows from T (%d)', relT.rows );
slider( labels, relT.rows, [ 1,1e9 ], 'L', relT.rowLineStyle, relT_rows_id );
labels = sprintf( 'Rows from U (%d)', relU.rows );
slider( labels, relU.rows, [ 1,1e9 ], 'L', relU.rowLineStyle, relU_rows_id );
labels = sprintf( 'Width T (%d bytes/row)', relT.rowSize );
slider( labels, relT.rowSize, [ 1,1000 ], '', '', relT_rowSize_id );
labels = sprintf( 'Width U (%d bytes/row)', relU.rowSize );
slider( labels, relU.rowSize, [ 1,1000 ], '', '', relU_rowSize_id );
function ...
subplans_draw( relT, relU )
text( sprintf( '-------------------- Subplan cost per row ... %s = 1/%g %s = 1/%g', ...
relT.name, 1 / relT.scanCostPerRow, relU.name, 1 / relU.scanCostPerRow ) );
labels = sprintf( '%s total cost (%d)\n%s total cost (%d)', ...
relT.name, relT.initialScanCost, relU.name, relU.initialScanCost );
slider( labels, [ relT.initialScanCost; relU.initialScanCost ], [ 1,1e9 ], 'L', '', explain_scanCost_id );
labels = sprintf( '%s rows to join (%d)\n%s rows to join (%d)', ...
relT.name, relT.initialRows, relU.name, relU.initialRows );
slider( labels, [ relT.initialRows; relU.initialRows ], [ 1,1e9 ], 'L', '', explain_rows_id );
function ...
vers_draw( blcksz, modelVersion, modelVersionChooser, optionCheckboxes, firsttab )
text( '-------------------- Version' );
labels = sprintf( 'Block size (%d KB)', blcksz(1)/1024 );
slider( labels, log2( blcksz(1)/4096 ), [0,6], '', '', blcksz_id );
settabs([firsttab, '\t\b', modelVersionChooser.longestName, '_']);
radiobuttons_draw( modelVersionChooser, modelVersion.enum_id );
checkboxes_draw( optionCheckboxes, firsttab );
settabs( firsttab );
function ( modelVersion, blcksz, NTUP_PER_BUCKET ) = ...
vers_mouseup( modelVersionChooser, blcksz, id, x1 )
( modelVersion, blcksz, NTUP_PER_BUCKET ) = ...
modelVersion_set( radiobuttons_mouseup( modelVersionChooser, id, x1 ), blcksz );
function work_mem = ...
work_mem_drag( nb, x1 )
if isempty(nb) || x1 < 32
cancel;
end
work_mem = x1;
@}
//--------------------------------------------------------------------//
// "(T hj U) vs (U hj T)" plots //
//--------------------------------------------------------------------//
figure "HashVsHash"
mouseover
( _msg, _cursor ) = xy_mouseover( _id, _x0, _y0 )
mouseover joincostTU_id
_msg = joincost_mouseover( relT, relU, hashbuildT, hashbuildU, 1, _x0, _y0 )
mouseover joincostUT_id
_msg = joincost_mouseover( relT, relU, hashbuildT, hashbuildU, 0, _x0, _y0 )
draw hashVsHash_draw( relT, relU, hashbuildU, ...
joinsel, costgucs, scaleflag, ...
work_mem, NTUP_PER_BUCKET, blcksz, ...
modelVersion, optionCheckboxes, ...
joincostTU, joincostUT, ...
1 )
figure "HashVsHash (Ratio)"
mouseover
( _msg, _cursor ) = xy_mouseover( _id, _x0, _y0 )
draw hashVsHash_draw( relT, relU, hashbuildU, ...
joinsel, costgucs, scaleflag, ...
work_mem, NTUP_PER_BUCKET, blcksz, ...
modelVersion, optionCheckboxes, ...
joincostTU, joincostUT, ...
2 )
figure "HashVsHash (Difference)"
mouseover
( _msg, _cursor ) = xy_mouseover( _id, _x0, _y0 )
draw hashVsHash_draw( relT, relU, hashbuildU, ...
joinsel, costgucs, scaleflag, ...
work_mem, NTUP_PER_BUCKET, blcksz, ...
modelVersion, optionCheckboxes, ...
joincostTU, joincostUT, ...
3 )
function
{@
function ...
hashVsHash_draw( relT, relU, hashbuildU, ...
joinsel, costgucs, scaleflag, ...
work_mem, NTUP_PER_BUCKET, blcksz, ...
modelVersion, optionCheckboxes, ...
joincostTU, joincostUT, ...
whichPlot )
sc = scale;
if isempty(sc)
sc = [1,1e9];
end
relT_rowSamples = 10.^[log10(max(1,sc(1))):.1:log10(max(1,sc(2)))];
for relT_rows = relT_rowSamples
tuCost(end+1) = ...
cost_hashjoin( relT_rows, relT.rowSize, relT_rows * relT.scanCostPerRow, 0, ...
relU.rows, relU.rowSize, relU.scanCost, ...
hashbuildU.nbuckets, hashbuildU.nbatch, ...
joinsel, costgucs, blcksz, ...
modelVersion, optionCheckboxes );
( hashT_nbuckets, hashT_nbatch, hashT_bytes(end+1) ) = ...
ExecChooseHashTableSize( relT_rows, relT.rowSize, ...
work_mem, NTUP_PER_BUCKET, ...
modelVersion, optionCheckboxes );
utCost(end+1) = ...
cost_hashjoin( relU.rows, relU.rowSize, relU.scanCost, 0, ...
relT_rows, relT.rowSize, relT_rows * relT.scanCostPerRow, ...
hashT_nbuckets, hashT_nbatch, ...
joinsel, costgucs, blcksz, ...
modelVersion, optionCheckboxes );
end
switch whichPlot
case 1
whoami = '';
hashVsHash_scale( sc, scaleflag, 'loglog', [1,1e12], [ 1, tuCost, utCost ] );
plot( relT_rowSamples, tuCost, 'r', joincostTU_id );
plot( relT_rowSamples, utCost, 'b', joincostUT_id );
legnd = 'Total cost (T x U)\nTotal cost (U x T)\nT bigger & inner\nU bigger & inner';
style = 'rbh(a0a0ff)xh(ffa0a0)+';
// plot the cost at subplan T's specified number of rows
plot( relT.rows, joincostTU, 'ro', joincostTU_id );
plot( relT.rows, joincostUT, 'b[', joincostUT_id );
// was larger rel chosen as inner? (counting hash table tuple overhead)
sizediff = hashT_bytes - hashbuildU.bytes;
costdiff = tuCost - utCost;
biginnerT = (sizediff >= 1000) & (costdiff >= 10);
biginnerU = (sizediff <= -1000) & (costdiff <= -10);
if nnz(biginnerT)
plot( relT_rowSamples(biginnerT), utCost(biginnerT), 'h(a0a0ff)x' );
end
if nnz(biginnerU)
plot( relT_rowSamples(biginnerU), tuCost(biginnerU), 'h(ffa0a0)+' );
end
case 2
whoami = '(Ratio)';
costratio = tuCost ./ utCost;
sizeratio = hashT_bytes / hashbuildU.bytes;
hashVsHash_scale( sc, scaleflag, 'loglog', [0.5,2.0], [ costratio ] );
line( [0,1], 1.0, 'h(c0c0c0)' );
plot( relT_rowSamples, costratio, 'g', xytrace_id );
plot( relT_rowSamples, sizeratio, 'm', xytrace_id );
// plot the cost ratio at subplan T's specified number of rows
plot( relT.rows, joincostTU / joincostUT, 'go', xytrace_id );
legnd = 'Cost(T x U) / Cost(U x T)\nT bytes / U bytes';
style = 'gm';
case 3
whoami = '(Difference)';
diff = tuCost - utCost;
hashVsHash_scale( sc, scaleflag, 'loglin', [-1000,1000], diff );
line( [0,1], 0.0, 'h(c0c0c0)' );
plot( relT_rowSamples, diff, 'm', xytrace_id );
// plot the difference at subplan T's specified number of rows
plot( relT.rows, joincostTU - joincostUT, 'm[', xytrace_id );
legnd = 'Cost(T x U) - Cost(U x T)';
style = 'm';
end
label( [' ', relT.name, ' Rows'] );
legend( legnd, style );
plotoption('tllegend');
title( sprintf( 'Hash Join Cost %s ... U = %.0f rows', ...
whoami, relU.rows ) );
function ...
hashVsHash_scale( sc, scaleflag, option, default, vec )
scale( option );
if scaleflag
sc(3) = min( vec );
sc(4) = max( vec );
else
sc(3:4) = default;
end
scale('lock', sc);
function msg = ...
joincost_mouseover( relT, relU, hashbuildT, hashbuildU, isTxU, x0, y0 )
xxx = isTxU ? 'T x U' : 'U x T';
hashRowSizeT = hashbuildT.bytes / relT.rows;
msg = sprintf( 'Cost(%s) = %.0f %s T = %.0f rows U = %.0f rows', ...
xxx, y0, ...
fmtsizepair( 'T = %.0f %sB ', x0 * hashRowSizeT, ...
' U = %.0f %sB', hashbuildU.bytes ), ...
x0, relU.rows );
@}
//--------------------------------------------------------------------//
// "Hash Join (x outer, y inner)" plots //
//--------------------------------------------------------------------//
figure "T HJ U"
mouseover
( _msg, _cursor ) = xy_mouseover( _id, _x0, _y0 )
mouseover joincost_id
_msg = joincost_mouseover( relT, relU, hashbuildT, hashbuildU, 1, _x0, _y0 )
draw joinCost_draw( relT, relU, hashbuildU, ...
joinsel, costgucs, blcksz, ...
modelVersion, optionCheckboxes, ...
joincostTU )
figure "U HJ T"
mouseover
( _msg, _cursor ) = xy_mouseover( _id, _x0, _y0 )
mouseover joincost_id
_msg = joincost_mouseover( relT, relU, hashbuildT, hashbuildU, 0, _x0, _y0 )
draw joinCost_draw( relU, relT, hashbuildT, ...
joinsel, costgucs, blcksz, ...
modelVersion, optionCheckboxes, ...
joincostUT )
function
{@
function ...
joinCost_draw( relOuter, relInner, hashbuildInner, ...
joinsel, costgucs, blcksz, ...
modelVersion, optionCheckboxes, ...
joincostOuterInner )
scale loglog;
scale('lock', [1,1e9,1,1e12]);
sc = scale;
outerRowSamples = 10.^[log10(max(1,sc(1))):.1:log10(max(1,sc(2)))];
for outerRows = outerRowSamples
(hjCost(end+1), hjIO(end+1), hjCPU(end+1)) = ...
cost_hashjoin( outerRows, relOuter.rowSize, outerRows * relOuter.scanCostPerRow, 0, ...
relInner.rows, relInner.rowSize, relInner.scanCost, ...
hashbuildInner.nbuckets, hashbuildInner.nbatch, ...
joinsel, costgucs, blcksz, ...
modelVersion, optionCheckboxes );
end
title( sprintf( 'Hash Join (%s outer, %s inner) ... %s = %.0f rows', ...
relOuter.name, relInner.name, relInner.name, relInner.rows ) );
line( [0,1], hashbuildInner.totalCost, 'm', xytrace_id );
plot( outerRowSamples, hjIO, 'g', xytrace_id );
plot( outerRowSamples, hjCPU, 'r', xytrace_id );
plot( outerRowSamples, hjCost, 'b', joincost_id );
// join cost at exact number of rows specified
plot( relOuter.rows, joincostOuterInner, [ relOuter.rowLineStyle, '[' ], joincost_id );
label( [relOuter.name, ' Rows (Outer)'] );
legend( sprintf( 'Total cost (%s x %s)\nI/O cost\nCPU cost\n%s build cost\n%s rows', ...
relOuter.name, relInner.name, relInner.name, relOuter.name ), ...
['bgrm', relOuter.rowLineStyle] );
plotoption('tllegend');
@}
//--------------------------------------------------------------------//
// "Hash Build" plots //
//--------------------------------------------------------------------//
figure "Hash Build (T inner)"
mouseover
( _msg, _cursor ) = xy_mouseover( _id, _x0, _y0 )
mousedrag inner_rows_id
( _msg, relT ) = rel_rows_dragmsg( relT, _nb, _x1 )
mouseover inner_rows_id
( _msg, _cursor ) = rel_rows_mouseover( relT, _x0 )
draw hashTable_draw( relT, joinsel, costgucs, ...
work_mem, blcksz, NTUP_PER_BUCKET, ...
modelVersion, optionCheckboxes )
figure "Hash Build (U inner)"
mouseover
( _msg, _cursor ) = xy_mouseover( _id, _x0, _y0 )
mousedrag inner_rows_id
( _msg, relU ) = rel_rows_dragmsg( relU, _nb, _x1 )
mouseover inner_rows_id
( _msg, _cursor ) = rel_rows_mouseover( relU, _x0 )
draw hashTable_draw( relU, joinsel, costgucs, ...
work_mem, blcksz, NTUP_PER_BUCKET, ...
modelVersion, optionCheckboxes )
function
{@
function ...
hashTable_draw( relInner, joinsel, costgucs, ...
work_mem, blcksz, NTUP_PER_BUCKET, ...
modelVersion, optionCheckboxes )
scale loglog;
scale('lock', [1,1e8,0.9,1e8]);
sc = scale;
rowSamples = 10.^[log10(max(1,sc(1))):.1:log10(max(1,sc(2)))];
// make sure the plot includes a sample at the exact number of rows specified
rowSamples = sort( [ rowSamples, relInner.rows ] );
for rows = rowSamples
(nbuckets(end+1), nbatch(end+1)) = ExecChooseHashTableSize( rows, relInner.rowSize, ...
work_mem, NTUP_PER_BUCKET, ...
modelVersion, optionCheckboxes );
hashpages(end+1) = (nbatch(end) <= 1) ? 0 : page_size(rows, relInner.rowSize, blcksz(1), modelVersion);
(hjStartupCost(end+1), hjStartupIO(end+1), hjStartupCPU(end+1)) = ...
cost_hashjoin( 0, 0, 0, 0, ...
rows, relInner.rowSize, rows * relInner.scanCostPerRow, ...
nbuckets(end), nbatch(end), ...
joinsel, costgucs, blcksz, ...
modelVersion, optionCheckboxes );
end
plotoption('tllegend');
plot(rowSamples, nbatch, 'h(ff8000)', xytrace_id);
plot(rowSamples, nbuckets, 'h(a0e060)', xytrace_id);
plot(rowSamples, hashpages, 'g', xytrace_id);
plot(rowSamples, hjStartupCost, 'm', xytrace_id);
line( [1,0], relInner.rows, relInner.rowLineStyle, inner_rows_id );
// mark the build cost at exact number of rows specified
i = find( rowSamples == relInner.rows, 1 );
plot( rowSamples(i), hjStartupCost(i), 'm[', xytrace_id );
label( [relInner.name, ' Rows (Inner)'] );
legnd = { 'Build cost\nSpill pages\nBatches\nBuckets', 'mgh(ff8000)h(a0e060)' };
legend( [legnd{1}, '\n', relInner.name, ' rows'], [legnd{2}, relInner.rowLineStyle] );
@}
//********************************************************************//
// //
// User Interface Utilities //
// //
//********************************************************************//
function
{@
function arr = ...
list2numFix( list )
// Sysquake 3.6 bug: Can't call list2num on an empty list; it returns garbage
arr = isempty( list ) ? [] : list2num( list );
@}
//--------------------------------------------------------------------//
// common UI functions //
//--------------------------------------------------------------------//
function
{@
function v = ...
floatSlider_drag(nb, x1)
if isempty(nb)
cancel;
end
v = x1;
function v = ...
intSlider_drag(nb, x1)
if isempty(nb)
cancel;
end
v = round(x1);
function (msg,cursor) = ...
xy_mouseover(id, x0, y0)
if isempty(id)
cancel;
end
msg = sprintf('x=%g y=%g', x0, y0);
cursor = false;
function s = ...
fmtsize( fmt, i )
j = abs( i );
if j >= 1e13
scl = 1e-12;
u = 'T';
elseif j >= 1e10
scl = 1e-9;
u = 'G';
elseif j >= 1e7
scl = 1e-6;
u = 'M';
elseif j >= 1e4
scl = 1e-3;
u = 'K';
else
scl = 1;
u = '';
end
// fmt should be something like '%.0f %s' or '%.3g%s'
s = sprintf( fmt, i * scl, u );
function s = ...
fmtsizepair( fmta, a, fmtb, b )
diff = a - b;
arrow = (diff > 1000) ? '>' : (diff < -1000) ? '<' : ' ';
s = [ fmtsize( fmta, a ), arrow, fmtsize( fmtb, b ) ];
@}
//--------------------------------------------------------------------//
// enumerator //
//--------------------------------------------------------------------//
function
{@
function enumerator = ...
enumerator_init( enum_name, enum_id )
enumerator = struct();
enumerator.enum_name = enum_name;
enumerator.enum_id = enum_id;
function enum_name = ...
enumerator_name( enumerator )
enum_name = enumerator.enum_name;
function enum_id = ...
enumerator_id( enumerator )
enum_id = enumerator.enum_id;
function enum_ids = ...
enumerator_ids( enumerators )
enum_ids = list2numFix( map( @enumerator_id, enumerators ) );
function enum_ids = ...
enumerator_ids_upd( enum_ids, remove_enum_ids, insert_enum_ids )
whichToDrop = ismember( enum_ids, [ remove_enum_ids, insert_enum_ids ] );
enum_ids = [ insert_enum_ids, enum_ids( ~whichToDrop ) ];
@}
//--------------------------------------------------------------------//
// enumChooser //
//--------------------------------------------------------------------//
function
{@
function ix = ...
enumChooser_name_to_ix( chooser, enum_name )
ix = find( list2numFix( map( @eq, chooser.enum_names, {enum_name} ) ), 1 ) || 0;
function ix = ...
enumChooser_id_to_ix( chooser, enum_id )
ix = find(chooser.enum_ids == enum_id, 1) || 0;
function enumerator = ...
enumChooser_name_to_enum( chooser, enum_name )
ix = enumChooser_name_to_ix( chooser, enum_name );
enumerator = ix ? chooser.enumerators{ix} : [];
function enumerator = ...
enumChooser_id_to_enum( chooser, enum_id )
ix = enumChooser_id_to_ix( chooser, enum_id );
enumerator = ix ? chooser.enumerators{ix} : struct();
function enumerators = ...
enumChooser_ids_to_enums( chooser, enum_ids )
enumerators = isempty( enum_ids ) ? {} : map( @enumChooser_id_to_enum, {chooser}, num2list( enum_ids ) );
function chooser = ...
enumChooser_init( enumerators, label, buttons_per_row, control_ids )
enum_names = map( @enumerator_name, enumerators );
enum_ids = enumerator_ids( enumerators );
n = length(enumerators);
assert(n <= buttons_per_row * length(control_ids));
bargs = {};
longestName = '';
for i = 1 : buttons_per_row : n
m = min(i + buttons_per_row - 1, n);
iba = length(bargs) + 1;
barg = struct();
barg.label = (i == 1) ? label : '';
for ix = i : m
enum_name = enum_names{ix};
barg.label = [barg.label, '\t', enum_name];
if length(longestName) < length(enum_name)
longestName = enum_name;
end
end
barg.control_id = control_ids(iba);
barg.ix0 = i - 1;
barg.ixM = m;
bargs{iba} = barg;
end
chooser = struct();
chooser.enumerators = enumerators;
chooser.enum_names = enum_names;
chooser.enum_ids = enum_ids;
chooser.buttonargs = bargs;
chooser.control_ids = control_ids(1:length(bargs));
chooser.longestName = longestName;
function buttonarg = ...
enumChooser_cid_to_barg( chooser, control_id )
iba = find( chooser.control_ids == control_id, 1 );
buttonarg = chooser.buttonargs{iba};
function ...
radiobuttons_draw( enumChooser, chosen_enum_id )
ix = enumChooser_id_to_ix( enumChooser, chosen_enum_id );
for buttonarg = enumChooser.buttonargs
button(buttonarg.label, ix - buttonarg.ix0, 'radiobutton', '', buttonarg.control_id);
end
function ( chosen_enumerator, chosen_enum_id ) = ...
radiobuttons_mouseup( enumChooser, id, x1 )
buttonarg = enumChooser_cid_to_barg(enumChooser, id);
chosen_enumerator = enumChooser.enumerators{x1 + buttonarg.ix0};
chosen_enum_id = chosen_enumerator.enum_id;
@}
//--------------------------------------------------------------------//
// checkboxes //
//--------------------------------------------------------------------//
function
{@
function checkboxes = ...
checkboxes_init( chosen_enum_ids, enumChooser )
checkboxes = struct();
checkboxes.chosen_enum_ids = chosen_enum_ids;
checkboxes.chooser = enumChooser;
checkboxes.checkmarks = checkboxes_checkmarks( chosen_enum_ids, enumChooser );
function chosen_enum_ids = ...
checkboxes_chosen( checkboxes )
chosen_enum_ids = checkboxes.chosen_enum_ids;
function ismem = ...
checkboxes_test( checkboxes, enum_ids )
ismem = ismember( enum_ids, checkboxes.chosen_enum_ids );
function checkboxes = ...
checkboxes_upd( checkboxes, remove_enum_ids, insert_enum_ids )
checkboxes.chosen_enum_ids = enumerator_ids_upd( checkboxes.chosen_enum_ids, remove_enum_ids, insert_enum_ids );
checkboxes.checkmarks = checkboxes_checkmarks( checkboxes.chosen_enum_ids, checkboxes.chooser );
function ...
checkboxes_draw( checkboxes, firsttab )
settabs( [ firsttab, '\t\b', checkboxes.chooser.longestName, '__' ] );
checkmarks = checkboxes.checkmarks;
bargs = checkboxes.chooser.buttonargs;
for iba = 1 : length( checkmarks )
barg = bargs{iba};
button( barg.label, checkmarks( iba ), 'checkmark', '', barg.control_id );
end
settabs( firsttab );
function checkboxes = ...
checkboxes_mouseup( checkboxes, id, x1, x0 )
// NB: For checkboxes we use mouseup instead of mousedrag, avoiding
// a Sysquake 3.6 bug in which the mousedrag handler's input and
// output variables of list or struct type become corrupted when
// the user drags the mouse (not just clicks) within a checkbox.
chooser = checkboxes.chooser;
insert_enum_ids = [];
remove_enum_ids = [];
buttonarg = enumChooser_cid_to_barg( chooser, id );
for iChanged = find(bitget(bitxor(x1, x0), [1:buttonarg.ixM]));
ix = iChanged + buttonarg.ix0;
id = chooser.enum_ids( ix );
if bitget(x1, iChanged)
insert_enum_ids(end+1) = id;
else
remove_enum_ids(end+1) = id;
end
end
checkboxes = checkboxes_upd( checkboxes, remove_enum_ids, insert_enum_ids );
function checkmarks = ...
checkboxes_checkmarks( chosen_enum_ids, enumChooser )
checkmarks = [];
for buttonarg = enumChooser.buttonargs
cm = 0;
for i = find(ismember(enumChooser.enum_ids(buttonarg.ix0+1 : buttonarg.ixM), chosen_enum_ids))
cm = bitset(cm, i);
end
checkmarks(end+1) = cm;
end
@}
//********************************************************************//
// //
// Cost model //
// //
//********************************************************************//
//--------------------------------------------------------------------//
// common cost model functions //
//--------------------------------------------------------------------//
// Last updated 24 March 2007.
function
{@
function per_tuple = ...
cost_qual_eval( nquals, cpu_operator_cost )
// costsize.c
per_tuple = nquals * cpu_operator_cost;
function nbytes_padded = ...
MAXALIGN( nbytes )
// c.h
nbytes_padded = bitand( nbytes+MAXIMUM_ALIGNOF-1, -MAXIMUM_ALIGNOF );
function nbytes = ...
relation_byte_size( tuples, width, modelVersion )
// costsize.c
// NB: width and sizeof_HeapTupleHeaderData have already been MAXALIGNed
nbytes = tuples * (width + modelVersion.sizeof_HeapTupleHeaderData);
function npages = ...
page_size( tuples, width, blcksz, modelVersion )
// costsize.c
npages = ceil( relation_byte_size( tuples, width, modelVersion ) / blcksz(1) );
@}
//--------------------------------------------------------------------//
// Hash Join cost //
//--------------------------------------------------------------------//
// Last updated 2007-08-28.
function
{@
function ( nbuckets, nbatch, inner_rel_bytes ) = ...
ExecChooseHashTableSize( ntuples, tupwidth, work_mem, NTUP_PER_BUCKET, ...
modelVersion, optionCheckboxes )
sizeof_HashJoinTupleData = MAXALIGN(sizeof_ptr + 4);
tupsize = sizeof_HashJoinTupleData + ...
modelVersion.sizeof_MinimalTupleData + ...
tupwidth;
inner_rel_bytes = ntuples * tupsize;
hash_table_bytes = work_mem * 1024;
if inner_rel_bytes > hash_table_bytes
nbuckets = min( floor(hash_table_bytes / tupsize / NTUP_PER_BUCKET), INT_MAX );
dbatch = min( ceil(inner_rel_bytes / hash_table_bytes), INT_MAX * 0.5 );
nbatch = 2;
while nbatch < dbatch
nbatch = nbatch * 2;
end
else
switch modelVersion.enum_id
case ver_gp300 : ver_gpmax
// nodeHash.c ver 1.11 / MAIN:tkordas:20070313184638
dbuckets_lower = ntuples / NTUP_PER_BUCKET;
dbuckets_upper = hash_table_bytes / (tupsize * NTUP_PER_BUCKET);
if dbuckets_upper > dbuckets_lower
dbuckets = (dbuckets_lower + dbuckets_upper)/2.0;
else
dbuckets = dbuckets_lower;
end
nbuckets = ceil(dbuckets);
otherwise
nbuckets = ceil(ntuples / NTUP_PER_BUCKET);
end
nbatch = 1;
end
hprimes = [ 1033, 2063, 4111, 8219, 16417, 32779, 65539, 131111, ...
262151, 524341, 1048589, 2097211, 4194329, 8388619, 16777289, 33554473, ...
67108913, 134217773, 268435463, 536870951, 1073741831 ];
nbuckets = hprimes( find( hprimes >= nbuckets, 1 ) ) || nbuckets;
function (total_cost, io_cost, cpu_cost) = ...
cost_hashjoin( outer_path_rows, outerRowSize, outer_path_total_cost, outer_path_startup_cost, ...
inner_path_rows, innerRowSize, inner_path_total_cost, ...
numbuckets, numbatches, ...
joinsel, costgucs, blcksz, ...
modelVersion, optionCheckboxes )
num_hashclauses = joinsel.nquals_hash;
cpu_operator_cost = costgucs.cpu_operator_cost;
cpu_tuple_cost = costgucs.cpu_tuple_cost;
seq_page_cost = costgucs.seq_page_cost;
// cost and selectivity of the hashquals and other restriction clauses
hash_qual_cost = cost_qual_eval( joinsel.nquals_hash, cpu_operator_cost );
qp_qual_cost = cost_qual_eval( joinsel.nquals_postjoin, cpu_operator_cost );
hash_selec = joinsel.selectivity;
// approx # tuples passing the hash quals
hashjointuples = hash_selec * outer_path_rows * inner_path_rows;
// cost of source data
startup_cost(end+1) = outer_path_startup_cost;
run_cost(end+1) = outer_path_total_cost - outer_path_startup_cost;
startup_cost(end+1) = inner_path_total_cost;
// cost of computing hash function
switch modelVersion.enum_id
case [ver_pg822 : ver_pgmax, ver_gp300 : ver_gpmax]
// pg costsize.c ver 1.169.2.1 / REL8_2_STABLE:tgl:20070108160931
// pg costsize.c ver 1.173 / MAIN:tgl:20070108160922
// gp costsize.c ver 1.29 / MAIN:cmcdevitt:20070316224417
startup_cost(end+1) = (cpu_operator_cost * num_hashclauses + cpu_tuple_cost) * inner_path_rows;
otherwise
startup_cost(end+1) = cpu_operator_cost * num_hashclauses * inner_path_rows;
end
run_cost(end+1) = cpu_operator_cost * num_hashclauses * outer_path_rows;
// hash table size that executor would use for inner relation
virtualbuckets = numbuckets * numbatches;
// bucketsize fraction for inner relation
// NB: In the code, this is usually determined from column statistics.
// Since we don't have that info, assume the best. Consequently our
// join cost estimate will usually be less than the real one from
// EXPLAIN.
innerbucketsize = 1 / virtualbuckets;
// I/O cost when hash table spills to disk
io_cost = 0;
if numbatches > 1
outerpages = page_size(outer_path_rows, outerRowSize, blcksz, modelVersion);
innerpages = page_size(inner_path_rows, innerRowSize, blcksz, modelVersion);
// experimental: exclude first batch from I/O cost
if checkboxes_test( optionCheckboxes, oc_spilltweak )
spillfrac = ( numbatches - 1 ) / numbatches;
outer_spill_rows = ceil( outer_path_rows * spillfrac );
inner_spill_rows = ceil( inner_path_rows * spillfrac );
outerpages = page_size(outer_spill_rows, outerRowSize, blcksz, modelVersion);
innerpages = page_size(inner_spill_rows, innerRowSize, blcksz, modelVersion);
end
startup_cost(end+1) = seq_page_cost * innerpages;
run_cost(end+1) = seq_page_cost * (innerpages + 2 * outerpages);
io_cost = startup_cost(end) + run_cost(end);
end
// cpu cost for hash lookups
joininfactor = 1;
chainwalk_cost = 0;
switch modelVersion.enum_id
case [ver_pgmin : ver_pg822 - 1]
// searching within hash chains
chainwalk_cost = hash_qual_cost * ...
outer_path_rows * ceil(inner_path_rows * innerbucketsize) * ...
joininfactor;
case ver_pg822 : ver_pgmax
// searching within hash chains
// pg costsize.c ver 1.169.2.1 / REL8_2_STABLE:tgl:20070108160931
// pg costsize.c ver 1.173 / MAIN:tgl:20070108160922
chainwalk_cost = hash_qual_cost * ...
outer_path_rows * ceil(inner_path_rows * innerbucketsize) * ...
joininfactor * 0.5;
case [ver_gp300 : ver_gp3002 - 1]
// evaluating hash join quals
// gp costsize.c ver 1.28 / MAIN:kharriman:20070227025348
// gp costsize.c ver 1.15.2.4 / Release-2_3_0-branch:kharriman:20070227023643
run_cost(end+1) = hash_qual_cost * hashjointuples;
// searching within hash chains
// gp costsize.c ver 1.28 / MAIN:kharriman:20070227025348
// gp costsize.c ver 1.15.2.5 / Release-2_3_0-branch:kharriman:20070227024315
chainwalk_cost = 0.05 * cpu_operator_cost * ...
outer_path_rows * inner_path_rows * innerbucketsize * joininfactor;
case ver_gp3002 : ver_gpmax
// evaluating hash join quals
// gp costsize.c ver 1.28 / MAIN:kharriman:20070227025348
// gp costsize.c ver 1.15.2.4 / Release-2_3_0-branch:kharriman:20070227023643
run_cost(end+1) = hash_qual_cost * hashjointuples;
// searching within hash chains
// gp costsize.c ver 1.29.6.4 / Release-3_0_0-branch:jzhang:20070816011436
gp_cost_hashjoin_chainwalk = false;
if gp_cost_hashjoin_chainwalk
chainwalk_cost = 0.05 * cpu_operator_cost * ...
outer_path_rows * inner_path_rows * innerbucketsize * joininfactor;
end
end
run_cost(end+1) = chainwalk_cost * costgucs.hj_chainwalk_weight;
// cpu cost for post-join quals
joininfactor = 1;
switch modelVersion.enum_id
case ver_pgmin : ver_pgmax
cpu_per_tuple = cpu_tuple_cost + qp_qual_cost;
run_cost(end+1) = cpu_per_tuple * hashjointuples * joininfactor;
case ver_gpmin : ver_gpmax
// gp costsize.c ver 1.15.2.1 / Release-2_3_0-branch:kharriman:20061105010029
// gp costsize.c ver 1.16 / MAIN:kharriman:20061105005913
resultRows_punt = hashjointuples; // assume all rows pass
run_cost(end+1) = qp_qual_cost * hashjointuples;
run_cost(end+1) = cpu_tuple_cost * resultRows_punt;
end
// bias against putting larger relation on inside
bias = 1;
innerbytes = inner_path_rows * innerRowSize;
outerbytes = outer_path_rows * outerRowSize;
if innerbytes > outerbytes && outerbytes > 0
switch modelVersion.enum_id
case ver_pgmin : ver_pg822 - 1
bias = sqrt(innerbytes/outerbytes);
case ver_gpmin : ver_gp300 - 1
// gp costsize.c ver 1.15.2.1 / Release-2_3_0-branch:kharriman:20061105010029
// gp costsize.c ver 1.16 / MAIN:kharriman:20061105005913
bias = 1 + 0.1*log(innerbytes/outerbytes);
case [ver_pg822 : ver_pgmax, ver_gp300 : ver_gpmax]
// pg costsize.c ver 1.169.2.1 / REL8_2_STABLE:tgl:20070108160931
// pg costsize.c ver 1.173 / MAIN:tgl:20070108160922
// gp costsize.c ver 1.29 / MAIN:cmcdevitt:20070316224417
// deleted the bias
end
end
startup_sum = sum(startup_cost);
run_sum = sum(run_cost);
/*
if outer_path_startup_cost == 1/256
dumpvar( { '<<<', [ outer_path_rows, inner_path_rows ], hashjointuples, ...
virtualbuckets, chainwalk_cost, ...
'startup_cost=', startup_sum, dumpvar(startup_cost), ...
'run_cost=', run_sum, dumpvar(run_cost), '>>>' } );
end
*/
total_cost = startup_sum + bias*run_sum;
cpu_cost = startup_sum + run_sum - io_cost;
@}