blob: cf708526ad33f46115ea363cb4b8b7bc9b30f2b5 [file] [log] [blame]
-- This file includes all the python script that help other SQL file to
-- finish the correctness test, such as check the CPU usage, the bitmap of
-- CPUSET, and the Cgroup file is exist or not.
--
-- In Cgroup v1 (Alpha), we will check the directory of
-- /sys/fs/cgroup/cpu/gpdb
-- /sys/fs/cgroup/cpuacct/gpdb
-- /sys/fs/cgroup/cpuset/gpdb
--
-- In Cgroup v2 (Beta), we will check the directory of
-- /sys/fs/cgroup/gpdb/*
--
-- When we run different tests, we should include different auxiliary tool files
-- to schedule file.
-- start_ignore
CREATE LANGUAGE plpython3u;
-- end_ignore
-- enable resource group and restart cluster.
-- prerequisites:
-- 1. '/sys/fs/cgroup/gpdb' must exist,
-- otherwise create it before run installcheck-resgroup-v2;
-- 2. 'gpconfig -c gp_resource_group_cgroup_parent -v "gpdb" && gpstop -rai'
-- must run before 'gpconfig -c gp_resource_manager -v group-v2', because
-- during the process of setting gp_resource_manager to group-v2, the
-- system will check whether the directory
-- '/sys/fs/cgroup/$gp_resource_group_cgroup_parent' exists.
-- start_ignore
! gpconfig -c gp_resource_group_cgroup_parent -v "gpdb";
! gpstop -rai;
! gpconfig -c gp_resource_manager -v group-v2;
! gpconfig -c max_connections -v 250 -m 25;
! gpconfig -c runaway_detector_activation_percent -v 100;
! gpstop -rai;
-- end_ignore
-- after the restart we need a new connection to run the queries
0: SHOW gp_resource_manager;
0: SHOW gp_resource_group_cgroup_parent;
-- resource queue statistics should not crash
0: SELECT * FROM pg_resqueue_status;
0: SELECT * FROM gp_toolkit.gp_resqueue_status;
0: SELECT * FROM gp_toolkit.gp_resq_priority_backend;
-- verify the default settings
0: SELECT * from gp_toolkit.gp_resgroup_config;
0: CREATE OR REPLACE FUNCTION check_cgroup_configuration() RETURNS BOOL AS $$
import os
root = '/sys/fs/cgroup/'
def get_cgroup_prop(prop):
fullpath = os.path.join(root, prop)
return int(open(fullpath).readline())
def show_guc(guc):
return plpy.execute('SHOW {}'.format(guc))[0][guc]
# get top-level cgroup props
shares = get_cgroup_prop('gpdb/cpu.weight')
# get system props
ncores = os.cpu_count()
# get global gucs
gp_resource_group_cpu_limit = float(show_guc('gp_resource_group_cpu_limit'))
gp_resource_group_cpu_priority = int(show_guc('gp_resource_group_cpu_priority'))
# shares := 100 * gp_resource_group_cpu_priority
assert shares == 100 * gp_resource_group_cpu_priority
def check_group_shares(name):
cpu_weight = int(plpy.execute('''
SELECT value
FROM pg_resgroupcapability c, pg_resgroup g
WHERE c.resgroupid=g.oid
AND reslimittype=3
AND g.rsgname='{}'
'''.format(name))[0]['value'])
oid = int(plpy.execute('''
SELECT oid FROM pg_resgroup WHERE rsgname='{}'
'''.format(name))[0]['oid'])
sub_shares = get_cgroup_prop('gpdb/{}/cpu.weight'.format(oid))
assert sub_shares == int(cpu_weight * 1024 / 100)
# check default groups
check_group_shares('default_group')
check_group_shares('admin_group')
check_group_shares('system_group')
# check user groups
check_group_shares('rg1_cpu_test')
check_group_shares('rg2_cpu_test')
return True
$$ LANGUAGE plpython3u;
-- check whether the queries running on the specific core set
-- @param grp: the resource group name queries running in
-- @param cpuset: cpu cores which the queries should only be run on them, e.g. 0,1
-- @return bool: true/false indicating whether it corresponds to the rule
0: CREATE FUNCTION check_cpuset(grp TEXT, cpuset TEXT) RETURNS BOOL AS $$
import subprocess
import time
import re
pt = re.compile(r'con\d+')
def check(expect_cpus, sess_ids):
# use ps -eF to find all processes which belongs to postgres and in the given sessions
procs = subprocess.check_output(['ps', '-eF']).decode().split('\n')
head, proc_stats = procs[0], procs[1:]
PSR = [id for id, attr in enumerate(head.split()) if attr.strip() == 'PSR'][0]
cpus = [proc_stat.split()[PSR].strip() for proc_stat in proc_stats if 'postgres' in proc_stat and
pt.findall(proc_stat) and sess_ids.issubset(set(pt.findall(proc_stat)))]
return set(cpus).issubset(set(expect_cpus))
def get_all_sess_ids_in_group(group_name):
sql = "select sess_id from pg_stat_activity where rsgname = '%s'" % group_name
result = plpy.execute(sql)
return set([str(r['sess_id']) for r in result])
conf = cpuset
if conf == '':
fd = open("/sys/fs/cgroup/gpdb/cpuset.cpus")
line = fd.readline()
fd.close()
conf = line.strip('\n')
tokens = conf.split(",")
expect_cpu = []
for token in tokens:
if token.find('-') != -1:
interval = token.split("-")
num1 = interval[0]
num2 = interval[1]
for num in range(int(num1), int(num2) + 1):
expect_cpu.append(str(num))
else:
expect_cpu.append(token)
sess_ids = get_all_sess_ids_in_group(grp)
for i in range(1000):
time.sleep(0.01)
if not check(expect_cpu, sess_ids):
return False
return True
$$ LANGUAGE plpython3u;
-- create a resource group that contains all the cpu cores
0: CREATE OR REPLACE FUNCTION create_allcores_group(grp TEXT) RETURNS BOOL AS $$
import subprocess
file = "/sys/fs/cgroup/gpdb/cpuset.cpus"
fd = open(file)
line = fd.readline()
fd.close()
line = line.strip('\n')
sql = "create resource group " + grp + " with (" + "cpuset='" + line + "')"
# plpy SPI will always start a transaction, but res group cannot be created in a transaction.
ret = subprocess.run(['psql', 'postgres', '-c' , '{}'.format(sql)], stdout=subprocess.PIPE)
if ret.returncode != 0:
plpy.error('failed to create resource group.\n {} \n {}'.format(ret.stdout, ret.stderr))
file = "/sys/fs/cgroup/gpdb/1/cpuset.cpus"
fd = open(file)
line = fd.readline()
fd.close()
line = line.strip('\n')
if line != "0":
return False
return True
$$ LANGUAGE plpython3u;
-- check whether the cpuset value in cgroup is valid according to the rule
0: CREATE OR REPLACE FUNCTION check_cpuset_rules() RETURNS BOOL AS $$
def get_all_group_which_cpuset_is_set():
sql = "select groupid,cpuset from gp_toolkit.gp_resgroup_config where cpuset != '-1'"
result = plpy.execute(sql)
return result
def parse_cpuset(line):
line = line.strip('\n')
if len(line) == 0:
return set([])
tokens = line.split(",")
cpuset = []
for token in tokens:
if token.find('-') != -1:
interval = token.split("-")
num1 = interval[0]
num2 = interval[1]
for num in range(int(num1), int(num2) + 1):
cpuset.append(str(num))
else:
cpuset.append(token)
return set(cpuset)
def get_cgroup_cpuset(group):
group = str(group)
if group == '0':
file = "/sys/fs/cgroup/gpdb/cpuset.cpus"
else:
file = "/sys/fs/cgroup/gpdb/" + group + "/cpuset.cpus"
fd = open(file)
line = fd.readline()
fd.close()
return parse_cpuset(line)
config_groups = get_all_group_which_cpuset_is_set()
groups_cpuset = set([])
# check whether cpuset in config and cgroup are same, and have no overlap
for config_group in config_groups:
groupid = config_group['groupid']
cpuset_value = config_group['cpuset']
config_cpuset = parse_cpuset(cpuset_value)
cgroup_cpuset = get_cgroup_cpuset(groupid)
if len(groups_cpuset & cgroup_cpuset) > 0:
return False
groups_cpuset |= cgroup_cpuset
if not (config_cpuset.issubset(cgroup_cpuset) and cgroup_cpuset.issubset(config_cpuset)):
return False
# check whether cpuset in resource group union default group is universal set
default_cpuset = get_cgroup_cpuset(1)
all_cpuset = get_cgroup_cpuset(0)
if not (default_cpuset | groups_cpuset).issubset(all_cpuset):
return False
if not all_cpuset.issubset(default_cpuset | groups_cpuset):
return False
# if all the cores are allocated to resource group, default group must has a core left
if len(default_cpuset & groups_cpuset) > 0 and (len(default_cpuset) != 1 or (not default_cpuset.issubset(all_cpuset))):
return False
return True
$$ LANGUAGE plpython3u;
0: CREATE OR REPLACE FUNCTION is_session_in_group(pid integer, groupname text) RETURNS BOOL AS $$
import subprocess
sql = "select sess_id from pg_stat_activity where pid = '%d'" % pid
result = plpy.execute(sql)
session_id = result[0]['sess_id']
sql = "select groupid from gp_toolkit.gp_resgroup_config where groupname='%s'" % groupname
result = plpy.execute(sql)
groupid = result[0]['groupid']
sql = "select hostname from gp_segment_configuration group by hostname"
result = plpy.execute(sql)
hosts = [_['hostname'] for _ in result]
def get_result(host):
stdout = subprocess.run(["ssh", "{}".format(host), "ps -ef | grep postgres | grep con{} | grep -v grep | awk '{{print $2}}'".format(session_id)],
stdout=subprocess.PIPE, check=True).stdout
session_pids = stdout.splitlines()
path = "/sys/fs/cgroup/gpdb/{}/queries/cgroup.procs".format(groupid)
stdout = subprocess.run(["ssh", "{}".format(host), "cat {}".format(path)], stdout=subprocess.PIPE, check=True).stdout
cgroups_pids = stdout.splitlines()
return set(session_pids).issubset(set(cgroups_pids))
for host in hosts:
if not get_result(host):
return False
return True
$$ LANGUAGE plpython3u;
0: CREATE OR REPLACE FUNCTION check_cgroup_io_max(groupname text, tablespace_name text, parameters text) RETURNS BOOL AS $$
import ctypes
import os
postgres = ctypes.CDLL(None)
get_bdi_of_path = postgres['get_bdi_of_path']
get_tablespace_path = postgres['get_tablespace_path']
get_tablespace_oid = postgres['get_tablespace_oid']
# get group oid
sql = "select groupid from gp_toolkit.gp_resgroup_config where groupname = '%s'" % groupname
result = plpy.execute(sql)
groupid = result[0]['groupid']
cgroup_path = "/sys/fs/cgroup/gpdb/%d" % groupid
# get path of tablespace
spcoid = get_tablespace_oid(tablespace_name.encode('utf-8'), False)
location = ctypes.cast(get_tablespace_path(spcoid), ctypes.c_char_p).value
if location == "":
return False
bdi = get_bdi_of_path(location)
major = os.major(bdi)
minor = os.minor(bdi)
match_string = "{}:{} {}".format(major, minor, parameters)
match = False
with open(os.path.join(cgroup_path, "io.max")) as f:
for line in f.readlines():
line = line.strip()
if match_string == line:
match = True
break
return match
$$ LANGUAGE plpython3u;
0: CREATE OR REPLACE FUNCTION mkdir(dirname text) RETURNS BOOL AS $$
import os
if os.path.exists(dirname):
return True
try:
os.makedirs(dirname)
except FileExistsError:
return True
except Exception as e:
plpy.error("cannot create dir {}".format(e))
else:
return True
$$ LANGUAGE plpython3u;
0: CREATE OR REPLACE FUNCTION rmdir(dirname text) RETURNS BOOL AS $$
import shutil
import fcntl
import os
try:
f = os.open(dirname, os.O_RDONLY)
except FileNotFoundError:
return True
fcntl.flock(f, fcntl.LOCK_EX)
if not os.path.exists(dirname):
os.close(f)
return True
try:
shutil.rmtree(dirname)
except Exception as e:
plpy.error("cannot remove dir {}".format(e))
else:
return True
finally:
os.close(f)
$$ LANGUAGE plpython3u;
0: CREATE OR REPLACE FUNCTION check_clear_io_max(groupname text) RETURNS BOOL AS $$
import ctypes
import os
postgres = ctypes.CDLL(None)
clear_io_max = postgres['clear_io_max']
# get group oid
sql = "select groupid from gp_toolkit.gp_resgroup_config where groupname = '%s'" % groupname
result = plpy.execute(sql)
groupid = result[0]['groupid']
clear_io_max(groupid)
cgroup_path = "/sys/fs/cgroup/gpdb/%d/io.max" % groupid
return os.stat(cgroup_path).st_size == 0
$$ LANGUAGE plpython3u;
0: CREATE OR REPLACE FUNCTION check_io_max_empty(groupname text) RETURNS BOOL AS $$
import os
# get group oid
sql = "select groupid from gp_toolkit.gp_resgroup_config where groupname = '%s'" % groupname
result = plpy.execute(sql)
groupid = result[0]['groupid']
cgroup_path = "/sys/fs/cgroup/gpdb/%d/io.max" % groupid
return os.stat(cgroup_path).st_size == 0
$$ LANGUAGE plpython3u;