blob: 42893af232b6dbded117b6e29fde550b523da115 [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* @file regression.sql_in
*
* @brief SQL functions for multi-linear and logistic regression
* @date January 2011
*
* @sa For a brief introduction to multi-linear regression, see the module
* description \ref grp_linreg. Likewise, for logistic regression, see the
* module description \ref grp_logreg.
*
*//* --------------------------------------------------------------------------
*
* This file is preprocessed with m4. Macro expansion can be turned of by
* enclosing text in <nom4> and </nom4>.
*/
changequote(`<nom4>', `</nom4>')
/**
@addtogroup grp_linreg
@about
Linear regression refers to a stochastic model, in which the conditional mean
of the dependent variable (usually denoted $y$) is an affine function of the
vector of independent variables (usually denoted \f$ \boldsymbol x \f$):
\f[
E[y \mid \boldsymbol x] = \boldsymbol c^T \boldsymbol x
\f]
for some unknown vector of coefficients \f$ \boldsymbol c \f$.
We fit the model with the ordinary-least-squares method. That is, the vector of
regression coefficients \f$ \boldsymbol c \f$ is estimated as:
\f[
\boldsymbol c = (X^T X)^+ X^T \boldsymbol y = X^+ \boldsymbol y
\f]
where
- $X$ is the design matrix with $k$ columns and $n$ rows, containing all
observed vectors of independent variables \f$ \boldsymbol x_i \f$ as rows
- $X^T$ denotes the transpose of $X$
- $X^+$ dentoes the pseudo-inverse of $X$.
Note: The identity \f$ X^+ = (X^T X)^+ X^T \f$ holds for all matrices $X$. A
proof can be found, e.g., at:
http://en.wikipedia.org/wiki/Proofs_involving_the_Moore%2DPenrose_pseudoinverse
Computing the <b>total sum of squares</b> $TSS$, the <b>explained
sum of squares</b> $ESS$ (also called the regression sum of
squares), and the <b>residual sum of squares</b> $RSS$ (also called sum of
squared residuals or sum of squared errors of prediction) is
done according to the following formulas:
\f[\begin{align*}
ESS & = \boldsymbol y^T X \boldsymbol c
- \frac{ \| y \|_1^2 }{n} \\
TSS & = \sum_{i=1}^k y_i^2
- \frac{ \| y \|_1^2 }{n} \\
R^2 & = \frac{ESS}{TSS}
\end{align*}\f]
Note: The last equality follows from the definition
\f$ R^2 = 1 - \frac{RSS}{TSS} \f$ and the fact that for linear regression
$TSS = RSS + ESS$. A proof of the latter can be found, e.g., at:
http://en.wikipedia.org/wiki/Sum_of_squares
We estimate the variance
\f$ Var[y - \boldsymbol c^T \boldsymbol x \mid \boldsymbol x] \f$ as
\f[
\sigma^2 = \frac{RSS}{n - k}
\f]
and compute the t-statistic for coefficient $i$ as
\f[
t_i = \frac{c_i}{\sqrt{\sigma^2 \cdot \left( (X^T X)^{-1} \right)_{ii} }}
\f]
The $p$-value for coefficient $i$ gives the probability that the null hypothesis
($c_i = 0$) is false, i.e., the probability that $c_i$ differs significantly
from 0. Letting \f$ F_\nu \f$ denote the cumulative density function of
student-t with \f$ \nu \f$ degrees of freedom, the $p$-value for coefficient $i$
is therefore
\f[
p_i = P(|T| \geq |t_i|) = 2 \cdot (1 - F_{n - k}( |t_i| ))
\f]
where $T$ is a student-t distributed random variable with mean 0.
@prereq
Implemented in C for PostgreSQL/Greenplum.
@usage
-# The data set is expected to be of the following form:\n
<tt>{TABLE|VIEW} <em>sourceName</em> ([...] <em>dependentVariable</em>
DOUBLE PRECISION, <em>independentVariables</em> DOUBLE PRECISION[],
[...])</tt>
-# Run the linear regression by:\n
<tt>SELECT mregr_coef(<em>dependentVariable</em>,
<em>independentVariables</em>) FROM <em>sourceName</em></tt>\n
Note: In order to model an intercept, set one coordinate in the
<tt>independentVariables</tt> array to 1. (See below for an example.)
\n
-# The coefficient of determination (also denoted $R^2$), the vector of
t-statistics, and the vector of p-values can be determined likewise by
mregr_r2(), mregr_tstats(), mregr_pvalues().
@examp
The following examples is taken from
http://www.stat.columbia.edu/~martin/W2110/SAS_7.pdf.
@verbatim
# select * from houses;
id | tax | bedroom | bath | price | size | lot
----+------+---------+------+--------+------+-------
1 | 590 | 2 | 1 | 50000 | 770 | 22100
2 | 1050 | 3 | 2 | 85000 | 1410 | 12000
3 | 20 | 3 | 1 | 22500 | 1060 | 3500
4 | 870 | 2 | 2 | 90000 | 1300 | 17500
5 | 1320 | 3 | 2 | 133000 | 1500 | 30000
6 | 1350 | 2 | 1 | 90500 | 820 | 25700
7 | 2790 | 3 | 2.5 | 260000 | 2130 | 25000
8 | 680 | 2 | 1 | 142500 | 1170 | 22000
9 | 1840 | 3 | 2 | 160000 | 1500 | 19000
10 | 3680 | 4 | 2 | 240000 | 2790 | 20000
11 | 1660 | 3 | 1 | 87000 | 1030 | 17500
12 | 1620 | 3 | 2 | 118600 | 1250 | 20000
13 | 3100 | 3 | 2 | 140000 | 1760 | 38000
14 | 2070 | 2 | 3 | 148000 | 1550 | 14000
15 | 650 | 3 | 1.5 | 65000 | 1450 | 12000
(15 rows)
# select mregr_coef(price, array[1, bedroom, bath, size])::REAL[] from houses;
mregr_coef
------------------------------------
{27923.4,-35524.8,2269.34,130.794}
(1 row)
# select mregr_r2(price, array[1, bedroom, bath, size])::REAL from houses;
mregr_r2
----------
0.745374
(1 row)
# select mregr_tstats(price, array[1, bedroom, bath, size])::REAL[] from houses;
mregr_tstats
--------------------------------------
{0.495919,-1.41891,0.102183,3.61223}
(1 row)
# select mregr_pvalues(price, array[1, bedroom, bath, size])::REAL[] from houses;
mregr_pvalues
-----------------------------------------
{0.629711,0.183633,0.920451,0.00408159}
(1 row)
@endverbatim
@sa file regression.sql_in (documenting the SQL functions)
@internal
@sa file regress.c (documenting the implementation in C), function
float8_mregr_compute() (documenting the formulas used for coefficients,
$R^2$, t-statistics, and p-values, implemented in C)
@endinternal
@literature
[1] Cosma Shalizi: Statistics 36-350: Data Mining, Lecture Notes, 21 October
2009, http://www.stat.cmu.edu/~cshalizi/350/lectures/17/lecture-17.pdf
*/
CREATE FUNCTION MADLIB_SCHEMA.float8_mregr_accum(state DOUBLE PRECISION[], y DOUBLE PRECISION, x DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION[]
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8_mregr_combine(state1 DOUBLE PRECISION[], state2 DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION[]
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE STRICT;
-- Final functions
CREATE FUNCTION MADLIB_SCHEMA.float8_mregr_coef(DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION[]
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;
CREATE FUNCTION MADLIB_SCHEMA.float8_mregr_r2(DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;
CREATE FUNCTION MADLIB_SCHEMA.float8_mregr_tstats(DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION[]
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;
CREATE FUNCTION MADLIB_SCHEMA.float8_mregr_pvalues(DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION[]
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;
/**
* @brief Compute multi-linear regression coefficients.
*
* To include an intercept in the model, set one coordinate in the
* <tt>independentVariables</tt> array to 1.
*
* @return Array of coefficients, which has the same length as the array of
* independent variables.
*
* @examp <tt>SELECT mregr_coef(y, [1, x1, x2]) FROM data;</tt>
*/
CREATE AGGREGATE MADLIB_SCHEMA.mregr_coef(
/*+ "dependentVariable" */ DOUBLE PRECISION,
/*+ "independentVariables" */ DOUBLE PRECISION[]) (
SFUNC=MADLIB_SCHEMA.float8_mregr_accum,
STYPE=float8[],
FINALFUNC=MADLIB_SCHEMA.float8_mregr_coef,
ifdef(<nom4>GREENPLUM</nom4>,<nom4>prefunc=MADLIB_SCHEMA.float8_mregr_combine,</nom4>)
INITCOND='{0}'
);
/**
* @brief Compute the coefficient of determination, $R^2$.
*/
CREATE AGGREGATE MADLIB_SCHEMA.mregr_r2(
/*+ "dependentVariable" */ DOUBLE PRECISION,
/*+ "independentVariables" */ DOUBLE PRECISION[]) (
SFUNC=MADLIB_SCHEMA.float8_mregr_accum,
STYPE=float8[],
FINALFUNC=MADLIB_SCHEMA.float8_mregr_r2,
ifdef(<nom4>GREENPLUM</nom4>,<nom4>prefunc=MADLIB_SCHEMA.float8_mregr_combine,</nom4>)
INITCOND='{0}'
);
/**
* @brief Compute the vector of t-statistics, for every coefficient.
*
* To include an intercept in the model, set one coordinate in the
* independentVariables array to 1.
*
* @param dependentVariable Dependent variable
* @param independentVariables Array of independent variables
* @return Array of t-statistics for each coefficient. The returned array has
* the same length as the array of independent variables.
*/
CREATE AGGREGATE MADLIB_SCHEMA.mregr_tstats(
/*+ "dependentVariable" */ DOUBLE PRECISION,
/*+ "independentVariables" */ DOUBLE PRECISION[]) (
SFUNC=MADLIB_SCHEMA.float8_mregr_accum,
STYPE=float8[],
FINALFUNC=MADLIB_SCHEMA.float8_mregr_tstats,
ifdef(<nom4>GREENPLUM</nom4>,<nom4>prefunc=MADLIB_SCHEMA.float8_mregr_combine,</nom4>)
INITCOND='{0}'
);
/**
* @brief Compute the vector of p-values, for every coefficient.
*
* @param dependentVariable Dependent variable
* @param independentVariables Array of independent variables
* @return Array of p-values for each coefficient. The returned array has
* the same length as the array of independent variables.
*/
CREATE AGGREGATE MADLIB_SCHEMA.mregr_pvalues(
/*+ "dependentVariable" */ DOUBLE PRECISION,
/*+ "independentVariables" */ DOUBLE PRECISION[]) (
SFUNC=MADLIB_SCHEMA.float8_mregr_accum,
STYPE=float8[],
FINALFUNC=MADLIB_SCHEMA.float8_mregr_pvalues,
ifdef(<nom4>GREENPLUM</nom4>,<nom4>prefunc=MADLIB_SCHEMA.float8_mregr_combine,</nom4>)
INITCOND='{0}'
);
/**
* @brief Student-t cumulative distribution function.
*
* @param nu Degree of freedom >= 1.
* @param x
*/
CREATE FUNCTION MADLIB_SCHEMA.student_t_cdf(nu INTEGER, x DOUBLE PRECISION)
RETURNS DOUBLE PRECISION
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE STRICT;
/**
@addtogroup grp_logreg
@about
Logistic regression is used to estimate probabilities of a dependent binary
variable, by fitting a stochastic model. It is one of the most commonly used
tools for applied statistics and data mining [1].
Logistic regression assumes a generalized linear model:
\f[
E[Y] = g^{-1}(\boldsymbol c^T X)
\f]
where:
- $Y$ is the dependent variable
- \f$\boldsymbol c^T X\f$ is the linear predictor
- \f$g(x) = \ln\left( \frac{x}{1-x} \right)\f$ is the link function, with
inverse \f$\sigma(x) := g^{-1}(x) = \frac{1}{1 + \exp(-x)} \f$
For each training data point $i$, we have a vector of
features $x_i$ and an observed class $y_i$. For ease of notation, let $Z$ be a
dependent random variable such that $Z = -1$ if $Y = 0$ and $Z = 1$ if $Y = 1$,
i.e., \f$Z := 2(Y - \frac 12)\f$. By definition,
\f$P[Z = z_i | X = x_i] = σ(z_i \cdot \boldsymbol c^T x_i)\f$.
Since logistic regression predicts probabilities, we can do maximum-likelihood
fitting: That is, we want the vector of regression coefficients
\f$\boldsymbol c\f$ to maximize
\f[
\prod_{i=1}^n \sigma(z_i \cdot \boldsymbol c^T \boldsymbol x_i)
\f]
or, equivalently, to maximize the objective
\f[
l(\boldsymbol c) =
-\sum_{i=1}^n \ln(1 + \exp(-z_i \cdot \boldsymbol c^T \boldsymbol x_i))
\f]
By looking at the Hessian, we can verify that \f$l(\boldsymbol c)\f$ is convex.
There are many techniques for solving convex optimization problems. Currently,
logistic regression in MADlib can use one of two algorithms:
- Iteratively Reweighted Least Squares
- A conjugate-gradient approach, also known as Fletcher-Reeves method in the
literature, where we use the Hestenes-Stiefel rule for calculating the step
size.
@prereq
Implemented in C (the computation) and Python (the driver/outer loop) for
PostgreSQL/Greenplum.
@usage
-# The training data is expected to be of the following form:\n
<tt>{TABLE|VIEW} <em>sourceName</em> ([...] <em>dependentVariable</em>
BOOLEAN, <em>independentVariables</em> DOUBLE PRECISION[], [...])</tt>
-# Run the logistic regression by:\n
<tt>SELECT logregr_coef('<em>sourceName</em>', '<em>dependentVariable</em>',
'<em>independentVariables</em>', <em>numIterations</em>,
'<em>optimizer</em>', <em>precision</em>)</tt>\n
The last three arguments are optional and can be omitted, in which case
default values will be used. See logregr_coef().\n
Note: In order to model an intercept, set one coordinate in the
<tt>independentVariables</tt> array to 1.
@examp
@verbatim
# select * from artificiallogreg;
y | x
---+-------------------------------------------------------
t | {-1.19845,1.15366,0.941779,-0.23669,-0.711024}
f | {-0.0680205,-0.716733,-0.149781,-0.410448,-0.0843123}
f | {-0.330021,0.222596,-0.976091,0.773816,-1.06238}
f | {0.648293,0.286225,0.524144,-0.141286,-1.41774}
f | {0.859484,-0.412929,-0.273368,-0.243059,0.714789}
[...]
# select madlib.logregr_coef(
# 'artificiallogreg', 'y', 'x', 20, 'irls', 0.001
# )::REAL[];
logregr_coef
---------------------------------------------
{-3.0307,3.63312,0.714105,-1.72496,1.37484}
@endverbatim
@sa file regression.sql_in (documenting the SQL functions)
@internal
@sa namespace logRegress (documenting the driver/outer loop implemented in
Python), function float8_cg_update_final() (documenting the
conjugate-gradient update/iteration steps, implemented in C), function
float8_cg_update_accum() (documenting the
iteratively-reweighted-least-squares update/iteration steps, implemented in
C)
@endinternal
@literature
A somewhat random selection of nice write-ups, with valuable pointers into
further literature:
[1] Cosma Shalizi: Statistics 36-350: Data Mining, Lecture Notes, 18 November
2009, http://www.stat.cmu.edu/~cshalizi/350/lectures/26/lecture-26.pdf
[2] Thomas P. Minka: A comparison of numerical optimizers for logistic
regression, 2003 (revised Mar 26, 2007),
http://research.microsoft.com/en-us/um/people/minka/papers/logreg/minka-logreg.pdf
[3] Paul Komarek, Andrew W. Moore: Making Logistic Regression A Core Data Mining
Tool With TR-IRLS, IEEE International Conference on Data Mining 2005,
pp. 685-688, http://komarix.org/ac/papers/tr-irls.short.pdf
*/
CREATE TYPE MADLIB_SCHEMA.logregr_cg_state AS (
iteration INTEGER,
len INTEGER,
coef DOUBLE PRECISION[],
dir DOUBLE PRECISION[],
grad DOUBLE PRECISION[],
beta DOUBLE PRECISION,
count BIGINT,
gradNew DOUBLE PRECISION[],
dTHd DOUBLE PRECISION,
logLikelihood DOUBLE PRECISION
);
CREATE TYPE MADLIB_SCHEMA.logregr_irls_state AS (
coef DOUBLE PRECISION[],
logLikelihood DOUBLE PRECISION
);
CREATE FUNCTION MADLIB_SCHEMA.float8_cg_update_accum(
MADLIB_SCHEMA.logregr_cg_state,
BOOLEAN,
DOUBLE PRECISION[],
MADLIB_SCHEMA.logregr_cg_state)
RETURNS MADLIB_SCHEMA.logregr_cg_state
AS 'MODULE_PATHNAME'
LANGUAGE C;
CREATE FUNCTION MADLIB_SCHEMA.float8_irls_update_accum(
DOUBLE PRECISION[],
BOOLEAN,
DOUBLE PRECISION[],
MADLIB_SCHEMA.logregr_irls_state)
RETURNS DOUBLE PRECISION[]
AS 'MODULE_PATHNAME'
LANGUAGE C;
CREATE FUNCTION MADLIB_SCHEMA.float8_cg_update_final(MADLIB_SCHEMA.logregr_cg_state)
RETURNS MADLIB_SCHEMA.logregr_cg_state
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;
CREATE FUNCTION MADLIB_SCHEMA.float8_irls_update_final(DOUBLE PRECISION[])
RETURNS MADLIB_SCHEMA.logregr_irls_state
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;
/**
* @internal
* @brief Perform one iteration of the conjugate-gradient method for computing
* logistic regression
*/
CREATE AGGREGATE MADLIB_SCHEMA.logregr_cg_step(
BOOLEAN,
DOUBLE PRECISION[],
MADLIB_SCHEMA.logregr_cg_state) (
SFUNC=MADLIB_SCHEMA.float8_cg_update_accum,
STYPE=MADLIB_SCHEMA.logregr_cg_state,
FINALFUNC=MADLIB_SCHEMA.float8_cg_update_final
);
/**
* @internal
* @brief Perform one iteration of the iteratively-reweighted-least-squares
* method for computing linear regression
*/
CREATE AGGREGATE MADLIB_SCHEMA.logregr_irls_step(
BOOLEAN,
DOUBLE PRECISION[],
MADLIB_SCHEMA.logregr_irls_state) (
SFUNC=MADLIB_SCHEMA.float8_irls_update_accum,
STYPE=float8[],
PREFUNC=MADLIB_SCHEMA.float8_mregr_combine,
FINALFUNC=MADLIB_SCHEMA.float8_irls_update_final,
INITCOND='{0}'
);
CREATE FUNCTION MADLIB_SCHEMA.logregr_should_terminate(
DOUBLE PRECISION[],
DOUBLE PRECISION[],
VARCHAR,
DOUBLE PRECISION)
RETURNS BOOLEAN
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;
-- begin functions for logistic-regression coefficients
-- We only need to document the last one (unfortunately, in Greenplum we have to
-- use function overloading instead of default arguments).
CREATE FUNCTION MADLIB_SCHEMA.logregr_coef(
"source" VARCHAR,
"depColumn" VARCHAR,
"indepColumn" VARCHAR)
RETURNS DOUBLE PRECISION[] AS $$
import sys
try:
ifdef(<nom4>DEBUG</nom4>,,<nom4>from madlib </nom4>)import logRegress
except:
sys.path.append("PLPYTHON_LIBDIR")
ifdef(<nom4>DEBUG</nom4>,,<nom4>from madlib </nom4>)import logRegress
return logRegress.compute_logregr_coef(**globals())
$$ LANGUAGE plpythonu VOLATILE;
CREATE FUNCTION MADLIB_SCHEMA.logregr_coef(
"source" VARCHAR,
"depColumn" VARCHAR,
"indepColumn" VARCHAR,
"numIterations" INTEGER)
RETURNS DOUBLE PRECISION[] AS $$
import sys
try:
ifdef(<nom4>DEBUG</nom4>,,<nom4>from madlib </nom4>)import logRegress
except:
sys.path.append("PLPYTHON_LIBDIR")
ifdef(<nom4>DEBUG</nom4>,,<nom4>from madlib </nom4>)import logRegress
return logRegress.compute_logregr_coef(**globals())
$$ LANGUAGE plpythonu VOLATILE;
CREATE FUNCTION MADLIB_SCHEMA.logregr_coef(
"source" VARCHAR,
"depColumn" VARCHAR,
"indepColumn" VARCHAR,
"numIterations" INTEGER,
"optimizer" VARCHAR)
RETURNS DOUBLE PRECISION[] AS $$
import sys
try:
ifdef(<nom4>DEBUG</nom4>,,<nom4>from madlib </nom4>)import logRegress
except:
sys.path.append("PLPYTHON_LIBDIR")
ifdef(<nom4>DEBUG</nom4>,,<nom4>from madlib </nom4>)import logRegress
return logRegress.compute_logregr_coef(**globals())
$$ LANGUAGE plpythonu VOLATILE;
/**
* @brief Compute logistic-regression coefficients
*
* To include an intercept in the model, set one coordinate in the
* <tt>independentVariables</tt> array to 1.
*
* @param source Name of the source relation containing the training data
* @param depColumn Name of the dependent column (of type BOOLEAN)
* @param indepColumn Name of the independent column (of type DOUBLE
* PRECISION[])
* @param numIterations The maximum number of iterations
* @param optimizer The optimizer to use (either
* <tt>'ilrs'</tt>/<tt>'newton'</tt> for iteratively reweighted least
* squares or <tt>'cg'</tt> for conjugent gradient)
* @param precision The difference between log-likelihood values in successive
* iterations that should indicate convergence, or 0 indicating that
* log-likelihood values should be ignored
*
* @note This function starts an iterative algorithm. It is not an aggregate
* function. Source and column names have to be passed as strings (due to
* limitations of the SQL syntax).
*
* @examp <tt>SELECT logregr_coef('data', 'y', 'array[1, x1, x2]', 20, 'cg',
* 0.001);</tt>
*
* @internal
* @sa This function is a wrapper for logRegress::compute_logregr_coef(), which
* sets the default values.
*/
CREATE FUNCTION MADLIB_SCHEMA.logregr_coef(
"source" VARCHAR,
"depColumn" VARCHAR,
"indepColumn" VARCHAR,
"numIterations" INTEGER /*+ DEFAULT 20 */,
"optimizer" VARCHAR /*+ DEFAULT 'irls' */,
"precision" DOUBLE PRECISION /*+ DEFAULT 0.0001 */)
RETURNS DOUBLE PRECISION[] AS $$
import sys
try:
ifdef(<nom4>DEBUG</nom4>,,<nom4>from madlib </nom4>)import logRegress
except:
sys.path.append("PLPYTHON_LIBDIR")
ifdef(<nom4>DEBUG</nom4>,,<nom4>from madlib </nom4>)import logRegress
return logRegress.compute_logregr_coef(**globals())
$$ LANGUAGE plpythonu VOLATILE;
ifdef(<nom4>PGXS</nom4>,<nom4>
/*
* Function for initializing python paths to the paths in dynamic_library_path.
* This is only needed when debugging Python-based functions without installing
* them in a location where Python would find them automatically.
*/
CREATE FUNCTION MADLIB_SCHEMA.init_python_paths()
RETURNS VOID AS
$$
# FIXME: The following code should be common code and not reside in a specialized module
import sys
dyld_paths = plpy.execute(
"SHOW dynamic_library_path")[0]["dynamic_library_path"].split(':')
before_default = True
count = 0
for path in dyld_paths:
if path == "$libdir":
before_default = False
else:
if before_default:
sys.path.insert(count, path)
count += 1
else:
sys.path.append(path)
$$ LANGUAGE plpythonu VOLATILE;
</nom4>)