)]}'
{
  "log": [
    {
      "commit": "d03af81fed814ef26dae2abf78f4a8f6eff71830",
      "tree": "a88b38b7f272796b053ebaa7679c8de94a61ac1c",
      "parents": [
        "b195300080853098b38609548ef99a10168ad382"
      ],
      "author": {
        "name": "Ed Espino",
        "email": "espino@apache.org",
        "time": "Tue Oct 28 23:25:14 2025 -0700"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Tue Oct 28 23:25:14 2025 -0700"
      },
      "message": "Update copyright year in NOTICE to 2025 (#629)\n\n"
    },
    {
      "commit": "b195300080853098b38609548ef99a10168ad382",
      "tree": "78155666f7cafce010fa5f54e705c65995082a59",
      "parents": [
        "ea6b535f45d36981552e2dba2b2a0e9b3bf68c6c"
      ],
      "author": {
        "name": "Ekta Khanna",
        "email": "ekhanna@vmware.com",
        "time": "Thu May 09 07:39:57 2024 -0700"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Sat May 11 04:10:09 2024 +0300"
      },
      "message": "Analyze to pick optimal plan for orca\n"
    },
    {
      "commit": "ea6b535f45d36981552e2dba2b2a0e9b3bf68c6c",
      "tree": "bcefde92b4a33691843678d08d5d6022d86701f7",
      "parents": [
        "9e32dd985043bb829cb1690b9c0493a71fc8796d"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu May 02 20:16:08 2024 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Sat May 11 04:10:09 2024 +0300"
      },
      "message": "DL: Use legacy optimizers\n\nWith the tf 2.12 version decay parameter got replaced. Now the user has\nto create a LearningRateSchedule object. While this should be possible\nto do inside the parameters, we use the legacy optimizers to keep\nthe existing notations possible as well.\n"
    },
    {
      "commit": "9e32dd985043bb829cb1690b9c0493a71fc8796d",
      "tree": "85290092a12fe9517c811fd164662acce20287a3",
      "parents": [
        "3f6500d32169ffeef058f3cf3cc3a3a9362e8fd8"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Fri Mar 01 16:21:50 2024 -0800"
      },
      "committer": {
        "name": "Ekta Khanna",
        "email": "ekhanna@pivotal.io",
        "time": "Mon Mar 04 14:48:02 2024 -0800"
      },
      "message": "PMML: Update user docs\n\nJIRA: MADLIB-1517\n\nStarting 0cd28f9733927d63beaefc9488db7f8bfdb3bd80, we no longer include\nintercept as a predictor in the pmml file. User docs need to be updated to\nincorporate these changes\n\nThis commit makes the following changes to the user docs:\n1. Remove the predictor variable \"1\" from the namespec expression\n2. Add a note about non array independent variable expressions\n"
    },
    {
      "commit": "3f6500d32169ffeef058f3cf3cc3a3a9362e8fd8",
      "tree": "93e69b52f737e50b5ef7c1f6cab36c339e8a9867",
      "parents": [
        "5b6f0033f1e43cad33acf8c30b303ef0d2f9da83"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Fri Mar 01 17:11:03 2024 -0800"
      },
      "committer": {
        "name": "Ekta Khanna",
        "email": "ekhanna@pivotal.io",
        "time": "Mon Mar 04 14:47:29 2024 -0800"
      },
      "message": "PMML: Add more namespec tests\n\nJIRA: MADLIB-1517\n\nThis commit adds a bit more coverage to the pmml tests by testing the pmml\nfunction with an ARRAY namespec instead of just text/string namespec formulaes\n"
    },
    {
      "commit": "5b6f0033f1e43cad33acf8c30b303ef0d2f9da83",
      "tree": "3a2a287ab98400ed9e2d4d9ff896f7afb00d0aff",
      "parents": [
        "b944045e624e791b6c41bca4ef5d56ba54d4bb68"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Fri Mar 01 10:59:43 2024 -0800"
      },
      "committer": {
        "name": "kaknikhil",
        "email": "nkak@vmware.com",
        "time": "Fri Mar 01 16:22:48 2024 -0800"
      },
      "message": "Update README to point to madlib2-master build\n"
    },
    {
      "commit": "b944045e624e791b6c41bca4ef5d56ba54d4bb68",
      "tree": "4fd1df209529033dda55991a099051a91446696b",
      "parents": [
        "0b75a0af162cf7c2e536bad3f92107f7748c325b"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Tue Feb 20 15:06:46 2024 -0800"
      },
      "committer": {
        "name": "kaknikhil",
        "email": "nkak@vmware.com",
        "time": "Fri Mar 01 16:22:48 2024 -0800"
      },
      "message": "PMML: Consider spaces when parsing the indep var\n\nJIRA: MADLIB-1517\n\nA previous commit 0cd28f9733927d63beaefc9488db7f8bfdb3bd80 added support to\nparse the independent var expression to determine if an intercept was used\nduring training. This commit improves the regex by adding support for spaces\nand also adds a detailed explanation for the regex\n\nThis commit also fixes a warning that would get generated with the previous regex:\n```\nre.compile(r\u0027array[[]([0-1],|[0-1].0,)?([\"a-z0-9_, .]+)[]]\u0027, flags\u003dre.I)\n\u003cstdin\u003e:1: FutureWarning: Possible nested set at position 6\n```\n"
    },
    {
      "commit": "0b75a0af162cf7c2e536bad3f92107f7748c325b",
      "tree": "87402e84bbff62539ebea18107104fed738f9e10",
      "parents": [
        "0c1cd4ff9387eb563fecacb0a5d4bb2fdc96ce16"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Mon Feb 26 23:17:01 2024 -0800"
      },
      "committer": {
        "name": "kaknikhil",
        "email": "nkak@vmware.com",
        "time": "Fri Mar 01 16:22:48 2024 -0800"
      },
      "message": "PMML: Separate out datasets and setup functions\n\nJIRA: MADLIB-1517\n\nThis commit separates out the setup function from the datasets used in the pmml\ndev-check tests. This is done to make it easier to just import the setup\nfunctions without having to also create the datasets which may not be used by\nall the modules like dt, rf etc.\n"
    },
    {
      "commit": "0c1cd4ff9387eb563fecacb0a5d4bb2fdc96ce16",
      "tree": "dfcfdcd24ed87363fd0a76fbeb7a80ae8dd936bc",
      "parents": [
        "769f758e4db0b4e88112e0d976f09194365768ed"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Thu Feb 22 16:05:43 2024 -0800"
      },
      "committer": {
        "name": "kaknikhil",
        "email": "nkak@vmware.com",
        "time": "Fri Mar 01 16:22:48 2024 -0800"
      },
      "message": "PMML: Add tests for intercept acting as a predictor\n\nJIRA: MADLIB-1517\n\nA previous commit 0cd28f9733927d63beaefc9488db7f8bfdb3bd80 made changes to the\npmml code so that the intercept won\u0027t be used as a predictor. But it\u0027s still\npossible that this assumption may not be true in some scenarios and the\nintercept might still be treated as a predictor in the pmml.\nFor e.g. consider this scenario:\nWhile using any of the regression algorithms, user passes the independent\nvariable as \"ARRAY[x1,1,x2] or ARRAY[x1,x2,1]\" instead of \"ARRAY[1,x1,x2]\"\nIn this scenario, the pmml code will assume that there isn\u0027t a intercept in\nthis expression and will treat \"1\" as a predictor.\nWhen predicting using this pmml, users will need to create a column/field named\n\"1\" which has the value 1 for each data row. The test added in this commit\nmimics this scenario\n"
    },
    {
      "commit": "769f758e4db0b4e88112e0d976f09194365768ed",
      "tree": "2d76b93e74839406ea1ca1fc06bf9e533a240beb",
      "parents": [
        "fa57c4fb40ce71f104449ec8356a3e0d53f97426"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Thu Feb 22 16:05:32 2024 -0800"
      },
      "committer": {
        "name": "kaknikhil",
        "email": "nkak@vmware.com",
        "time": "Fri Mar 01 16:22:48 2024 -0800"
      },
      "message": "PMML: Improve namespec dev-check tests\n\nJIRA: MADLIB-1517\n\nThis commit adds a few more namespec pmml tests\n"
    },
    {
      "commit": "fa57c4fb40ce71f104449ec8356a3e0d53f97426",
      "tree": "751303b254a6dcb42bce38af14141a61f7979b6d",
      "parents": [
        "12fb88877de26f6efd6777f6ac9fa12960e3658e"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Tue Feb 20 17:10:46 2024 -0800"
      },
      "committer": {
        "name": "kaknikhil",
        "email": "nkak@vmware.com",
        "time": "Fri Mar 01 16:22:48 2024 -0800"
      },
      "message": "PMML: Improve dev-check tests for random forest\n\nJIRA: MADLIB-1517\n\nThis commit adds a few more random forest pmml tests that compare\nforest_predict\u0027s output with pypmml\u0027s output\n"
    },
    {
      "commit": "12fb88877de26f6efd6777f6ac9fa12960e3658e",
      "tree": "a9e291c42e8e4b141b08fe62c617474a256d4e1b",
      "parents": [
        "43775de29c01d83e9d491e2e1357603ca611294d"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Tue Feb 20 16:18:15 2024 -0800"
      },
      "committer": {
        "name": "kaknikhil",
        "email": "nkak@vmware.com",
        "time": "Fri Mar 01 16:22:48 2024 -0800"
      },
      "message": "PMML: Improve dev-check tests for decision tree\n\nJIRA: MADLIB-1517\n\nThis commit adds a few more decision tree pmml tests that compare tree_predict\u0027s\noutput with pypmml\u0027s output\n"
    },
    {
      "commit": "43775de29c01d83e9d491e2e1357603ca611294d",
      "tree": "7a9d8876bbeb518399f2c629e9708a0070671136",
      "parents": [
        "c9987434d4bef4dc468ac9bbead456e5103b4693"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Tue Feb 20 15:06:07 2024 -0800"
      },
      "committer": {
        "name": "kaknikhil",
        "email": "nkak@vmware.com",
        "time": "Fri Mar 01 16:22:48 2024 -0800"
      },
      "message": "PMML: Improve ordinal dev-check tests\n\nJIRA: MADLIB-1517\n\nThis commit adds a few more ordinal pmml tests that compare ordinal_predict\u0027s\noutput with pypmml\u0027s output\n"
    },
    {
      "commit": "c9987434d4bef4dc468ac9bbead456e5103b4693",
      "tree": "4bb9441b6e8273c1dd319dfdbd8611c81c48a9bd",
      "parents": [
        "22dd9986758bd8ae6e786f36e5d7ba93ca0ded5e"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Tue Feb 20 11:11:36 2024 -0800"
      },
      "committer": {
        "name": "kaknikhil",
        "email": "nkak@vmware.com",
        "time": "Fri Mar 01 16:22:48 2024 -0800"
      },
      "message": "PMML: Improve multinom dev-check tests\n\nJIRA: MADLIB-1517\n\nThis commit adds a few more multinom pmml tests including grouping and no\nintercept\n"
    },
    {
      "commit": "22dd9986758bd8ae6e786f36e5d7ba93ca0ded5e",
      "tree": "40310c24da275ddc5b7a70d523822249b561f168",
      "parents": [
        "5e5849e851be529ee471c4ea45b2269f54cf8640"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Mon Feb 19 19:04:59 2024 -0800"
      },
      "committer": {
        "name": "kaknikhil",
        "email": "nkak@vmware.com",
        "time": "Mon Feb 26 11:28:42 2024 -0800"
      },
      "message": "PMML: Fix segfault in postgres dev-check\n\nJIRA: MADLIB-1517\n\nOne of the dev-check queries for the glm pmml would segfault in postgres\nprobably because of this warning\n```\nWARNING:  Hessian or gradient is not finite.\n```\nModified the query to fix the segfault\n\nFor reference, here is the failure:\n```\nDROP TABLE IF EXISTS glm_model, glm_model_summary;\nDROP TABLE\nSELECT glm(\n    \u0027abalone\u0027,\n    \u0027glm_model\u0027,\n    \u0027rings \u003c 10\u0027,\n    \u0027ARRAY[1, length, diameter, height, whole, shucked, viscera, shell]\u0027,\n    \u0027family\u003dbinomial, link\u003dlogit\u0027, \u0027sex\u0027, \u0027max_iter\u003d1000, tolerance\u003d1e-16\u0027\n);\nWARNING:  Hessian or gradient is not finite.\nserver closed the connection unexpectedly\n\tThis probably means the server terminated abnormally\n\tbefore or while processing the request.\nThe connection to the server was lost. Attempting reset: Failed.\n```\n"
    },
    {
      "commit": "5e5849e851be529ee471c4ea45b2269f54cf8640",
      "tree": "a6e16f4fa44d5cf98cdce69913a794780d2c86db",
      "parents": [
        "8a85d8e054da4014c36137097e00315c43f930e3"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Fri Feb 16 16:10:09 2024 -0800"
      },
      "committer": {
        "name": "kaknikhil",
        "email": "nkak@vmware.com",
        "time": "Mon Feb 26 11:28:42 2024 -0800"
      },
      "message": "PMML: Install pyyaml and java for jenkins\n\nJIRA: MADLIB-1517\n\nPrevious few commits created dev-check tests that need the pypmml module to\ntest madlib\u0027s pmml function. We need to install this in our jenkins build so\nthat the PR pipeline can run the pmml tests. This commit installs the pypmml\nmodule and also installs java which is needed by the pypmml module itself.\n"
    },
    {
      "commit": "8a85d8e054da4014c36137097e00315c43f930e3",
      "tree": "005758f44dbb4f8043f83d00ab9757729b1dd44b",
      "parents": [
        "0cd28f9733927d63beaefc9488db7f8bfdb3bd80"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Fri Feb 16 13:34:15 2024 -0800"
      },
      "committer": {
        "name": "kaknikhil",
        "email": "nkak@vmware.com",
        "time": "Mon Feb 26 11:28:42 2024 -0800"
      },
      "message": "PMML: Add dev-check tests for pmml export\n\nJIRA: MADLIB-1517\n\nThis commit introduces a new function named `test_pmml_output` which can be\nused to test the output of the pmml function. It compares the output of\nmadlib\u0027s predict function with pypmml\u0027s predict function\n\n* Added a pmml setup file that contains all the common datasets and functions\n  used by the pmml dev-check tests\n* Added exhaustive tests to glm, linear and logistic. A future commit will add\n  more tests to other pmml modules\n"
    },
    {
      "commit": "0cd28f9733927d63beaefc9488db7f8bfdb3bd80",
      "tree": "dfabe41a7b32e3c12ea977df5783dba653bcb12c",
      "parents": [
        "c82a5c99bf9e35b448ab219a50f552ed4982a611"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Thu Feb 15 17:53:56 2024 -0800"
      },
      "committer": {
        "name": "kaknikhil",
        "email": "nkak@vmware.com",
        "time": "Mon Feb 26 11:28:42 2024 -0800"
      },
      "message": "PMML: Do not include intercept as a predictor\n\nJIRA: MADLIB-1517\n\nNote that this commit only fixes GLM, logisitic and linear. A future commit\nwill fix other pmml modules.\n\nContext :\n--------------------------------------------------------\nMADlib\u0027s way of passing intercept to regression models is a bit unusual.\nUsually intercept is a boolean which indicates whether the model needs to be\nfit with intercept or not. MADlib makes the user pass an integer (1 means use\nan intercept and no value means don\u0027t fit with intercept) along with the other\nindependent variables and uses that directly for computation. For e.g.\nARRAY[1,x1,x2,...] indicates use an intercept whereas ARRAY[x1,x2,...] means\ndon\u0027t use an intercept\n\nProblem:\n--------------------------------------------------------\n* So essentially all the regression algorithms treat the intercept value \"1\" as\n  just another independent variable(it\u0027s always the first one though).\n* Because of this implicit assumption, users need to specifically inject a\n  predictor named \"1\" with a value of 1 for all the input rows. This can be very\n  inconvenient specially when using pmml to predict a stream of data or some\n  other preprocessed form of data.\n\nFix:\n--------------------------------------------------------\n* Once the model is trained, the only way to know if the model was fit with\n  intercept is to look at the `independent_varname` field in the summary table.\n* If the value contains ARRAY[1, x1, x2..], then an intercept was used.\n* Since this intercept is just another independent variable, there aren\u0027t any\n  explicit references or logic to handle intercept in our python or c++ code for\n  training or predict.\n* Because of this assumption, the pmml code also considers all of\n  \"ARRAY[1,x1,x2,...]\" as independent variables and hence the output pmml\n  contains \"1\" as an input predictor.\n* We can just remove all references to the column \"1\" in the pmml file. We will\n  still keep the \"p0\" variable which is explicitly marked as an intercept and\n  will store the intercept\u0027s coefficient\n* The pmml module gets the \u0027X\u0027 and \u0027Y\u0027 values from the summary table and then\n  parses it to create a list of all the independent predictors so that it can\n  be written to the pmml file\n* It uses regex to match the expression ARRAY[1,x1,x2]/ARRAY[x1,x2] and then\n  returns either [\u00271\u0027,\u0027x1\u0027,\u0027x2\u0027] or [\u0027x1\u0027,x2\u0027]\n* Our goal with the pmml code is to not treat the intercept \"1\" as an\n  independent predictor but just as an intercept\n* The commit fixes this by changing the regex and using the output to determine\n  if an intercept was passed so that both expressions\n  ARRAY[1,x1,x2]/ARRAY[x1,x2] return [\u0027x1\u0027, \u0027x2\u0027]\n* Also had to make changes to the various pmml builder classes to treat\n  intercept\u0027s coefficient differently than the feature coefficients\n* Note that this commit only fixes GLM, logisitic and linear. A future commit\n  will fix other pmml modules.\n\nBefore the fix:\n```\n\u003c?xml version\u003d\"1.0\" standalone\u003d\"yes\"?\u003e\n\u003cPMML version\u003d\"4.1\" xmlns\u003d\"http://www.dmg.org/PMML-4_1\"\u003e\n \u003cHeader copyright\u003d\"Copyright (c) 2024 nkak\"\u003e\n   \u003cExtension extender\u003d\"MADlib\" name\u003d\"user\" value\u003d\"nkak\"/\u003e\n   \u003cApplication name\u003d\"MADlib\" version\u003d\"2.1.0\"/\u003e\n   \u003cTimestamp\u003e2024-02-16 12:19:58.798139 PDT\u003c/Timestamp\u003e\n \u003c/Header\u003e\n \u003cDataDictionary numberOfFields\u003d\"4\"\u003e\n   \u003cDataField name\u003d\"second_attack_pmml_prediction\" optype\u003d\"categorical\" dataType\u003d\"boolean\"\u003e\n     \u003cValue value\u003d\"True\"/\u003e\n     \u003cValue value\u003d\"False\"/\u003e\n   \u003c/DataField\u003e\n   \u003cDataField name\u003d\"1\" optype\u003d\"continuous\" dataType\u003d\"double\"/\u003e\n   \u003cDataField name\u003d\"treatment\" optype\u003d\"continuous\" dataType\u003d\"double\"/\u003e\n   \u003cDataField name\u003d\"trait_anxiety\" optype\u003d\"continuous\" dataType\u003d\"double\"/\u003e\n \u003c/DataDictionary\u003e\n \u003cRegressionModel functionName\u003d\"classification\" normalizationMethod\u003d\"softmax\"\u003e\n   \u003cMiningSchema\u003e\n     \u003cMiningField name\u003d\"second_attack_pmml_prediction\" usageType\u003d\"predicted\"/\u003e\n     \u003cMiningField name\u003d\"1\"/\u003e\n     \u003cMiningField name\u003d\"treatment\"/\u003e\n     \u003cMiningField name\u003d\"trait_anxiety\"/\u003e\n   \u003c/MiningSchema\u003e\n   \u003cRegressionTable intercept\u003d\"0.0\" targetCategory\u003d\"True\"\u003e\n     \u003cNumericPredictor name\u003d\"1\" coefficient\u003d\"-6.363469941781864\"/\u003e\n     \u003cNumericPredictor name\u003d\"treatment\" coefficient\u003d\"-1.0241060523932668\"/\u003e\n     \u003cNumericPredictor name\u003d\"trait_anxiety\" coefficient\u003d\"0.11904491666860616\"/\u003e\n   \u003c/RegressionTable\u003e\n   \u003cRegressionTable intercept\u003d\"0.0\" targetCategory\u003d\"False\"/\u003e\n \u003c/RegressionModel\u003e\n\u003c/PMML\u003e\n```\n\nAfter the fix:\n```\n\u003c?xml version\u003d\"1.0\" standalone\u003d\"yes\"?\u003e\n\u003cPMML version\u003d\"4.1\" xmlns\u003d\"http://www.dmg.org/PMML-4_1\"\u003e\n  \u003cHeader copyright\u003d\"Copyright (c) 2024 nkak\"\u003e\n    \u003cExtension extender\u003d\"MADlib\" name\u003d\"user\" value\u003d\"nkak\"/\u003e\n    \u003cApplication name\u003d\"MADlib\" version\u003d\"2.1.0\"/\u003e\n    \u003cTimestamp\u003e2024-02-16 13:37:15.367609 PDT\u003c/Timestamp\u003e\n  \u003c/Header\u003e\n  \u003cDataDictionary numberOfFields\u003d\"3\"\u003e\n    \u003cDataField name\u003d\"second_attack_pmml_prediction\" optype\u003d\"categorical\" dataType\u003d\"boolean\"\u003e\n      \u003cValue value\u003d\"True\"/\u003e\n      \u003cValue value\u003d\"False\"/\u003e\n    \u003c/DataField\u003e\n    \u003cDataField name\u003d\"treatment\" optype\u003d\"continuous\" dataType\u003d\"double\"/\u003e\n    \u003cDataField name\u003d\"trait_anxiety\" optype\u003d\"continuous\" dataType\u003d\"double\"/\u003e\n  \u003c/DataDictionary\u003e\n  \u003cRegressionModel functionName\u003d\"classification\" normalizationMethod\u003d\"softmax\"\u003e\n    \u003cMiningSchema\u003e\n      \u003cMiningField name\u003d\"second_attack_pmml_prediction\" usageType\u003d\"predicted\"/\u003e\n      \u003cMiningField name\u003d\"treatment\"/\u003e\n      \u003cMiningField name\u003d\"trait_anxiety\"/\u003e\n    \u003c/MiningSchema\u003e\n    \u003cRegressionTable intercept\u003d\"-6.36346994178186\" targetCategory\u003d\"True\"\u003e\n      \u003cNumericPredictor name\u003d\"treatment\" coefficient\u003d\"-1.0241060523932697\"/\u003e\n      \u003cNumericPredictor name\u003d\"trait_anxiety\" coefficient\u003d\"0.11904491666860609\"/\u003e\n    \u003c/RegressionTable\u003e\n    \u003cRegressionTable intercept\u003d\"0.0\" targetCategory\u003d\"False\"/\u003e\n  \u003c/RegressionModel\u003e\n\u003c/PMML\u003e\n\n```\n\nRisks and limitations:\n--------------------------------------------------------\n1. Straying away from the intrinsic intercept assumption only for the pmml code:\n  * As we have established already, the intercept is not treated any different\n    from an independent variable.\n  * To fix the pmml file to not include the intercept as an independent variable,\n    we will need to break this intrinsic assumption.\n  * If the pmml code breaks this assumption, it\u0027s possible that there might be\n    some unexpected side effects or errors that even with exhaustive testing may\n    not be uncovered. For e.g. the pmml code relies on the len of the coefficient\n    to make some decisions about naming and such. (See formula.py for an example)\n    which might have some weird edge cases\n\n  We might be fine with this risk for now and if something breaks in the future,\n  we can deal with it later. Biggest risk is that something fundamental breaks in\n  the future that might make us revert this new logic. But the odds of that are\n  pretty low\n\n2. If the user passed a non array expression for the independent variable\n  Consider the following example\n\n  ```\n  -- Create a table where the x variable is an array of the independent variables to train on\n  CREATE TABLE warpbreaks_dummy_simple_xcol AS SELECT breaks AS y, ARRAY[1,\"wool_B\",\"tension_M\", \"tension_H\"] AS x_a from warpbreaks_dummy;\n\n  -- Now use the column \u0027x_a\u0027 created in the previous step.\n  SELECT madlib.glm(\u0027warpbreaks_dummy_simple_xcol\u0027, \u0027glm_warpbreaks_intercept_1_simple_xcol\u0027, \u0027y\u0027 , \u0027x_a\u0027 , \u0027family\u003dpoisson, link\u003dlog\u0027);\n  ```\n  Now there\u0027s no way for us know if this model was fit with an intercept or not.\n  The only way to know is to check the value of \"independent_varname\" in the\n  summary table which would be \"x_A\" in this case which won\u0027t tell us anything\n  about the intercept.\n\n  Ideally, we would like to change the fit functions to take a boolen for the\n  intercept arg but that will too big of a change and hence is out of scope of this commit.\n\n  The easiest fix for this problem for now is that we are going to assume that\n  all non array expressions always include the intercept. Note that this\n  assumption only applies to the pmml module\n\n3. Using the name_spec arg of the pmml function\n  * The pmml function accepts an optional arg named \"name_spec\" which is used to explicitly name the input and output variables in the pmml file.\n  * The user will now need to remove the \"1\" from this expression\n     For e.g. `SELECT madlib.pmml(\u0027patients_logregr\u0027, \u0027attack~1+anxiety+treatment\u0027);` will have to be rewritten as\n      `SELECT madlib.pmml(\u0027patients_logregr\u0027, \u0027attack~anxiety+treatment\u0027);`\n  * We will need to remove this from the pmml user docs which will be done in a separate PR.\n\n4. If the intercept is not the first one in the independent_varname array expression\n\n   Consider the following examples\n   ```\n   SELECT madlib.linregr_train(\u0027houses\u0027, \u0027linregr_model\u0027, \u0027price\u0027, \u0027array[bedroom, 1, bath, size]\u0027);\n   SELECT madlib.linregr_predict(coef, ARRAY[bedroom, 1, bath, size]) FROM linregr_model, houses;\n   ```\n   or\n   ```\n   SELECT madlib.linregr_train(\u0027houses\u0027, \u0027linregr_model\u0027, \u0027price\u0027, \u0027array[bedroom, bath, size, 1]\u0027);\n   SELECT madlib.linregr_predict(coef, ARRAY[bedroom, bath, size, 1]) FROM linregr_model, houses;\n   ```\n   Both of these are allowed which makes it really hard for the pmml code to figure out if the intercept was used or not.\n\n   Solution 1:\n   * Always assume that the intercept arg \"1\" will be at the start of the expression.\n   * All our regression user docs usually specify the intercept in the beginning so most of our users will be used to that format.\n   * There is a small risk that when the intercept is not in the beginning of the expression, the exported pmml will assume that \"1\" is a normal predictor and not an intercept. This is no different than how\n     it\u0027s treated right now before we decided to fix it. Users will just need to provide a column named \"1\" when predicting using that pmml\n\n   Solution 2:\n   * pmml code will need to get smarter and parse the array expression to figure out the position of the intercept and then accordingly get the intercept coefficient from the coef array\n   * This will require a lot of work and might still not be foolproof since we also allow passing random integers in the independent variable expression.(see previous issue)\n   * Even if we ignore the integer issue, we will need to make quite a few changes to the pmml code which can be error prone and hard to maintain.\n\n   Decided to go with Solution 1 for ease of use and maintainability\n"
    },
    {
      "commit": "c82a5c99bf9e35b448ab219a50f552ed4982a611",
      "tree": "483370395bff794a242b7ba2d856e31b0c5c2cd9",
      "parents": [
        "3c568245cc55242bed549a66f47defdf1653dbca"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Fri Feb 16 15:57:28 2024 -0800"
      },
      "committer": {
        "name": "kaknikhil",
        "email": "nkak@vmware.com",
        "time": "Mon Feb 26 11:28:42 2024 -0800"
      },
      "message": "Update NOTICE for 2024\n"
    },
    {
      "commit": "3c568245cc55242bed549a66f47defdf1653dbca",
      "tree": "e0d50b8d78371dc183678ede47887c5e903e30bf",
      "parents": [
        "70c548c102b1e5d9013294284affcde00a6412fb"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Tue Sep 05 20:04:54 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Tue Sep 05 20:04:54 2023 +0300"
      },
      "message": "Update release_notes with RC2 changes\n"
    },
    {
      "commit": "70c548c102b1e5d9013294284affcde00a6412fb",
      "tree": "836587abc3eecf11dcea8ac883fc0264687d9375",
      "parents": [
        "395a81923864444c3a641187441bfdd0e022225f"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Tue Sep 05 10:19:59 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Tue Sep 05 19:50:31 2023 +0300"
      },
      "message": "Remove PyXB mentions from code, docs and licenses\n"
    },
    {
      "commit": "395a81923864444c3a641187441bfdd0e022225f",
      "tree": "8d17d7c38d6c745220154af52c260e278360b1c0",
      "parents": [
        "b734a8f672bef782d5415419934ae6adcb11d148"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Sat Sep 02 09:02:58 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Tue Sep 05 19:50:31 2023 +0300"
      },
      "message": "Fix anyarray -\u003e anycompatiblearray change for PG14\n"
    },
    {
      "commit": "b734a8f672bef782d5415419934ae6adcb11d148",
      "tree": "ad8503a69aba6ee166ebddb5024c3ab2139463d9",
      "parents": [
        "40dfa0e3810b2f8d070941ba8053cc7b7886f169"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Sep 01 20:39:20 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Tue Sep 05 19:50:31 2023 +0300"
      },
      "message": "Build: Remove PyXB from cmake and use pyxb-x\n\nPyXB is abandoned and broken for py3.10+. pyxb-x is a patched version of\npyxb that still has backwards compatibility. Instead of trying to pack\npyxb with the MADlib, we just rely on yum like many other libraries.\n"
    },
    {
      "commit": "40dfa0e3810b2f8d070941ba8053cc7b7886f169",
      "tree": "1e0cf97f1daf5d9f3a6a09b493aae6711798bca2",
      "parents": [
        "845bcb5c8e6f0a71b609e50905683bd4d84ed70c"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Sep 01 20:34:48 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Tue Sep 05 19:50:31 2023 +0300"
      },
      "message": "DL: Update get_state_to_return to always return bytea\n\nget_state_to_return returned bytea in some cases and a single float in\nothers. The float value is not actually used so it did not break the\nprocess. However, this still causes python instability and errors in\nsome cases. This commit fixes the issue by converting the float value to\nbytea just like the rest of the options.\n"
    },
    {
      "commit": "845bcb5c8e6f0a71b609e50905683bd4d84ed70c",
      "tree": "a811f2c565c89c01ff207b6b5ef5917d32ed45d0",
      "parents": [
        "6d621d5291b5381d1cb048739e8328cc5c527dea"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Aug 31 13:09:12 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Tue Sep 05 19:50:31 2023 +0300"
      },
      "message": "Build: Use PG15 in jenkins build\n"
    },
    {
      "commit": "6d621d5291b5381d1cb048739e8328cc5c527dea",
      "tree": "64bf16d004cfddbda02abcbe11ec8a6b608d9b7b",
      "parents": [
        "279d25e7261ef0462c0ad681d3efe9caf76a8ee7"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Aug 24 23:33:21 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Aug 28 08:54:12 2023 +0300"
      },
      "message": "Build: Add post- scripts for gppkg and gp7 version checks in madpack\n"
    },
    {
      "commit": "279d25e7261ef0462c0ad681d3efe9caf76a8ee7",
      "tree": "786f0a9297fc7d9eebc0b980a9406829d03075f0",
      "parents": [
        "c0e6e6d834187b2bb4a25386a5877acbe8843620"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Aug 24 22:53:05 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Aug 28 08:54:12 2023 +0300"
      },
      "message": "Add 2.0.0 to 2.1.0 changelist\n"
    },
    {
      "commit": "c0e6e6d834187b2bb4a25386a5877acbe8843620",
      "tree": "c8bd69134fb01dc308773428a8e0e5584122653a",
      "parents": [
        "66f3fe9e5f55fb91be72832d71b5f7813fd1103f"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Aug 23 21:38:08 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Aug 24 15:20:51 2023 +0300"
      },
      "message": "Update version number to 2.1.0 and add release notes\n"
    },
    {
      "commit": "66f3fe9e5f55fb91be72832d71b5f7813fd1103f",
      "tree": "df61f831813b4da96625375921b6b60658f9e20c",
      "parents": [
        "96dd1d5b705c7d51f91cb5a34a4df44cdc8c3960"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Tue Aug 22 16:58:29 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Wed Aug 23 21:34:26 2023 +0300"
      },
      "message": "Madpack: Add the actual path of $libdir\n\nMADlib has to set dynamic_library_path but gpconfig do not work with the\ndefault value $libdir. We get the full libdir path and use it instead of\nthe variable.\n"
    },
    {
      "commit": "96dd1d5b705c7d51f91cb5a34a4df44cdc8c3960",
      "tree": "9481369dce1ef484cd074aed72331f23e37d1427",
      "parents": [
        "7a16084c03343e1f7ee8aa22708ac94d8b52cd68"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Aug 18 15:34:03 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Aug 21 21:07:16 2023 +0300"
      },
      "message": "Build: Add Ubuntu flag for PyXB installation\n\nPyXB uses distutils which is being phased out. The ubuntu images fail to\ninstall it the old way so this commit adds a flag to identify the OS as\nUbuntu during cmake and use pip install as needed.\n"
    },
    {
      "commit": "7a16084c03343e1f7ee8aa22708ac94d8b52cd68",
      "tree": "430e570bfd5d042853d61045c77d41fa27a15108",
      "parents": [
        "f91813bedb275bb042968a5e38d4293bbad6b325"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Jul 12 08:56:01 2023 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Aug 21 21:07:16 2023 +0300"
      },
      "message": "Various: Reduce SERIAL usage\n\nSince SERIAL columns do not guarantee a sequence of numbers with no\ngaps, this commit removes its usage from load_keras_model.\nThe issue was unearthed by a change in the GPDB code setting the cache\nfor sequences to 20 and causing the numbers to jump unexpectedly.\nA similar issue is found in assoc_rules. There we wanted to avoid using\nrow_number so we set the cache to 1 to get the previous behavior.\n"
    },
    {
      "commit": "f91813bedb275bb042968a5e38d4293bbad6b325",
      "tree": "fe4c3a18f71b96771deea9c5feb2ac477b3996fb",
      "parents": [
        "8e5c222b91056ff5989ab71dbccf046a772339bf"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Jun 15 21:02:08 2023 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Fri Jul 07 17:28:42 2023 -0400"
      },
      "message": "Build: Add support for PG15\n"
    },
    {
      "commit": "8e5c222b91056ff5989ab71dbccf046a772339bf",
      "tree": "853ee01cb945c745a5febb41d123c4ccf0ea8a88",
      "parents": [
        "92739b20167808c017c6ac93b857b309a6b80624"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Jun 28 14:01:02 2023 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Wed Jun 28 19:07:33 2023 -0400"
      },
      "message": "Update version number to 2.1.0-dev\n"
    },
    {
      "commit": "92739b20167808c017c6ac93b857b309a6b80624",
      "tree": "4f5ad8522965e20ee153bb2f1643ebb5f94fb5bd",
      "parents": [
        "70fac35b0158b35017ae93f8abafda62bbb3c224"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Jun 09 10:40:03 2023 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Jun 12 13:24:23 2023 -0400"
      },
      "message": "Update version number and add release notes\n"
    },
    {
      "commit": "70fac35b0158b35017ae93f8abafda62bbb3c224",
      "tree": "5c311022991fe3deb61321425b3cd02dad6be0db",
      "parents": [
        "2e5a7ba01a03a27f5a8bbe274642e58c0ac194b7"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri May 19 09:08:33 2023 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Wed May 24 08:38:07 2023 -0400"
      },
      "message": "Bayes: Disable ORCA before accessing views\n\nStarting with GP7 Beta3, accesing bayes created views started crashing\nthe db with ORCA. This commit disabled the optimizer with a FIXME\ncomment to investigate in the future.\n"
    },
    {
      "commit": "2e5a7ba01a03a27f5a8bbe274642e58c0ac194b7",
      "tree": "a4927cf00314d3dfc4852a22467ae4efbe271fd9",
      "parents": [
        "769188e927e9e5564b4aa9bfd5e77bcf36dda16f"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Tue May 16 18:09:58 2023 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Wed May 24 08:38:07 2023 -0400"
      },
      "message": "Build: Add gppkg v2 support for GP7\n\nThis commit updates the packaging process to use the new gppkg_v2 for\nGP7. Currently we unpack the existing RPM to create it but at some point\nwe should overhaul the system to actually collect the files and don\u0027t\neven create the intermediate RPM.\n"
    },
    {
      "commit": "769188e927e9e5564b4aa9bfd5e77bcf36dda16f",
      "tree": "4094b2f5ecd5054f74efe4c63b5d979b67e14423",
      "parents": [
        "03fba10fc0d115afcf25c00e1b97ef747676396a"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed May 10 14:41:48 2023 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon May 22 11:25:26 2023 -0400"
      },
      "message": "Build: Add PG13 on Ubuntu 20 support for Jenkins\n\nThis commit fixes the Jenkins pipeline for PG13. It disables some tests\n(with FIXME tag) since they had docker specific memory issues.\nPMML is also fixed for py3.\nFinally, it adds PG14 and PG15 support for future convenience.\n"
    },
    {
      "commit": "03fba10fc0d115afcf25c00e1b97ef747676396a",
      "tree": "15536d4827c9b80f6e40003d0e50f6979ca3e776",
      "parents": [
        "533d5e73f8444009026c9b06908edc69c51af189"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Mar 29 23:01:58 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Apr 03 17:06:22 2023 +0300"
      },
      "message": "Update jenkins to use PG13\n\nThe current jenkins build is failing because of a server crash.\nAdded a FIXME in the build script to look at this problem and find an\nefficient way to debug in a different JIRA.\n"
    },
    {
      "commit": "533d5e73f8444009026c9b06908edc69c51af189",
      "tree": "f4d2134921c662dc5540bd1ee1e375940f4f4114",
      "parents": [
        "8ce0eef32979cd8a6d3b1137aad9095b56b0c134"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Mar 29 19:12:38 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Apr 03 17:06:22 2023 +0300"
      },
      "message": "Address review comments\n"
    },
    {
      "commit": "8ce0eef32979cd8a6d3b1137aad9095b56b0c134",
      "tree": "e1ead74d26b120e4a8c151b133f43e40b17ddcbf",
      "parents": [
        "7e6d0563fd83a74cfba31b5d878fa1dd2265e57b"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Mar 17 19:14:19 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Apr 03 17:06:22 2023 +0300"
      },
      "message": "Add Postgres 13 with python3 support\n"
    },
    {
      "commit": "7e6d0563fd83a74cfba31b5d878fa1dd2265e57b",
      "tree": "b12448da73ee40418e77fe260db36639e9dd84e2",
      "parents": [
        "149363a17ce9f3675d617b6abeb861542309d2ea"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Mar 17 19:11:51 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Apr 03 17:06:22 2023 +0300"
      },
      "message": "Remove redundant cmake files\n"
    },
    {
      "commit": "149363a17ce9f3675d617b6abeb861542309d2ea",
      "tree": "50d5679c208b868bebcb5ae19e782a0663bc0d05",
      "parents": [
        "5442a64464504e5247173c7b39399a6dc2bbecb1"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Mon Nov 21 16:43:44 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Apr 03 17:06:22 2023 +0300"
      },
      "message": "Various fixes: Add gpdb7 specific checks\n"
    },
    {
      "commit": "5442a64464504e5247173c7b39399a6dc2bbecb1",
      "tree": "f25d9fe1a9fc0cfd5085e187e3a92b79db44bd22",
      "parents": [
        "76fe872b30b2e63d0ebf6ae83ab7d73ee650dcf6"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Oct 19 17:22:11 2022 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Apr 03 17:06:22 2023 +0300"
      },
      "message": "Add python3 support for GPDB6\n\nGPDB6 has python3.9 support via an additional package. This commit\nupdates the gpdb7/python3.6 branch to support it.\n\nPython 3.9 necessitates updates to xgboost and tensorflow. Tensorflow\n2.10 requires a number of changes in the deep learning module.\n"
    },
    {
      "commit": "76fe872b30b2e63d0ebf6ae83ab7d73ee650dcf6",
      "tree": "cf98029bde2666cc4e3a09ba34ea7131136739cb",
      "parents": [
        "3eaae9971d1b9f852ddec918ee77acfac5d402c7"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Apr 29 15:42:52 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Apr 03 17:06:22 2023 +0300"
      },
      "message": "Add python3 support\n\nThis commit changes a large number of files to support python3. It has\nbeen tested on GPDB7 development branch.\n"
    },
    {
      "commit": "3eaae9971d1b9f852ddec918ee77acfac5d402c7",
      "tree": "469b1bb0f3d4fc5930ef243de5779cd1c4b688ba",
      "parents": [
        "f06638ee2b173ec793dab66fd7cdbf1e68bd05c8"
      ],
      "author": {
        "name": "soarpenguin",
        "email": "soarpenguin@gmail.com",
        "time": "Sat Aug 07 09:17:40 2021 +0000"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Apr 03 17:06:22 2023 +0300"
      },
      "message": "update: fix madlib support py3.\n"
    },
    {
      "commit": "f06638ee2b173ec793dab66fd7cdbf1e68bd05c8",
      "tree": "53ed63d85d6c765373815850f5a9c40919248aa0",
      "parents": [
        "68adc8b4af65efc4a660442230fd1e7a38cd90f0"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Mar 10 16:03:11 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Mar 10 16:03:11 2023 +0300"
      },
      "message": "Update version number to 2.0.0-dev\n"
    },
    {
      "commit": "68adc8b4af65efc4a660442230fd1e7a38cd90f0",
      "tree": "a27c61e731c8191d9765181a002215833c8f2fcb",
      "parents": [
        "a7d182c872326133eda54b990094ad75d03e9d5e"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Feb 23 12:25:16 2023 -0500"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Feb 23 12:28:16 2023 -0500"
      },
      "message": "SSSP: Drop temp views\n"
    },
    {
      "commit": "a7d182c872326133eda54b990094ad75d03e9d5e",
      "tree": "c0d09cd1ad8a36eec2a8d0a13e1cec9a0838e865",
      "parents": [
        "f01d433886e9b943a22db3a24777e0ee113b0d79"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Feb 22 13:33:11 2023 -0500"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Feb 23 09:29:37 2023 -0500"
      },
      "message": "Build: Update version to 1.21.0 and add release notes\n"
    },
    {
      "commit": "f01d433886e9b943a22db3a24777e0ee113b0d79",
      "tree": "f1ff713ca925b8a8f8ba4da2e5f38d3e71f64b61",
      "parents": [
        "ba772ae58bd05edfe25e81d30c529cbcf6082c22"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Mon Feb 13 11:35:00 2023 -0500"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Wed Feb 22 20:41:12 2023 -0500"
      },
      "message": "WCC: Add warm start\n\nWCC creates a large number of subtransactions which may cause system\nperformance degredation in some cases. This cpmmit adds a parameter to\nlimit the number of iterations it runs as well as another one to\ncontinue from the incomplete state.\n"
    },
    {
      "commit": "ba772ae58bd05edfe25e81d30c529cbcf6082c22",
      "tree": "095373ee6a85b35457343a50ba7e41b7a803c263",
      "parents": [
        "d2c4d47894f8c28aa4dc81eb760383c27d7bafe5"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Jan 06 14:29:09 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Wed Feb 22 20:02:01 2023 -0500"
      },
      "message": "SSSP: Fix negative cycle check\n\nSSSP checks the iteration counter to identify negative cycles. If a\nshortest path has the same length as the number of vertices, then the\ncheck incorrectly identifies this as a negative cycle.\nThis commit fixes the issue and adds a relevant test.\n"
    },
    {
      "commit": "d2c4d47894f8c28aa4dc81eb760383c27d7bafe5",
      "tree": "71176784770db0e9b1ddc0dd9990c152468e0655",
      "parents": [
        "475aba5bec6e95f9023db1ba4b8edb1c00e878c0"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Mon Feb 13 11:32:09 2023 -0500"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Tue Feb 21 08:31:33 2023 -0500"
      },
      "message": "Disable TestIfNoUTF8BOM.py for all platforms\n\nTestIfNoUTF8BOM.py file was needed for Postgres Versions before 9. Since\nwe don\u0027t support them any more, this commit comments them out. If there\nare no problems, the commented lines and the file itself should be\nremoved in a later commit.\n"
    },
    {
      "commit": "475aba5bec6e95f9023db1ba4b8edb1c00e878c0",
      "tree": "114b729e6ab8598046bda8c0f496590217c4cc22",
      "parents": [
        "ac1517347fd37fce4691dfe73ab25095d04ae723"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Feb 16 12:25:48 2023 -0500"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Feb 16 12:26:18 2023 -0500"
      },
      "message": "Update NOTICE for 2023\n"
    },
    {
      "commit": "ac1517347fd37fce4691dfe73ab25095d04ae723",
      "tree": "727fcf4baeb8792dc17a0d7b8a77e78e3d136c8b",
      "parents": [
        "ee7c919d256322d66a3112a862d6123f713d7a45"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Jan 04 15:15:44 2023 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Wed Jan 04 15:16:25 2023 +0300"
      },
      "message": "Fix TestIfNoUTF8BOM.py for newer versions of cmake\n"
    },
    {
      "commit": "ee7c919d256322d66a3112a862d6123f713d7a45",
      "tree": "388658ed6507963fe2961b6a5b8f079258015af5",
      "parents": [
        "92db2aef512bed83afec9b4cb3f253c8420967fe"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Oct 13 15:06:45 2022 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Nov 21 14:43:15 2022 +0300"
      },
      "message": "XGBoost: Various fixes\n\n- Fix class label verification in predict\n\nClass label is an optional argument for predict but the code checks to\nensure the column exists in the table. This commit fixes the issue and\nadds a test to run predict with default values and no optional\nparameters.\n\n- Add support for bigint and varchar id col\n\nXGBoost supports non-integer values as id columns (not features) in the\npython implementation. This commit alters the surrounding code to\naccomodate for such column types and adds/alters tests accordingly.\n\n- Add eval_metrics as a parameter\n\neval_metrics is used by XGBoost used monitoring the training result and\nearly stopping. We expose this parameter to the user and parse it to\npass it to the fit function (instead of init).\n"
    },
    {
      "commit": "92db2aef512bed83afec9b4cb3f253c8420967fe",
      "tree": "c6cdcc9247d2ef6c5872a78c398bedfcc454e30d",
      "parents": [
        "16e3e865509b9059e360f0215356f4393b2149dd"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Aug 18 20:59:07 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Aug 22 18:22:07 2022 +0300"
      },
      "message": "Build: Fix photon rpm creation\n\nJIRA: MADLIB-1510\n\nCreating RPMs on Photon 3 systems missed rpm scripts. This commit\nensures that it is treated like a redhat system and necessary scripts\nare included in the metadata.\n"
    },
    {
      "commit": "16e3e865509b9059e360f0215356f4393b2149dd",
      "tree": "b31e90b5e2c987e7dcd5d34474eb53ad7953838b",
      "parents": [
        "195895cc68f85648cb7e2b5fdc92f6069e006dfb"
      ],
      "author": {
        "name": "Jianwen Dong",
        "email": "djianwen@vmware.com",
        "time": "Tue Aug 16 11:39:56 2022 -0700"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Wed Aug 17 21:38:21 2022 +0300"
      },
      "message": "Support Photon OS in the build system.\n\nJIRA: MADLIB-1510\n\nPreviously the building system only support Centos and Debian OS.\nIn order to run MADlib on Photon OS, we need the support to build it\nwith cmake on Photon OS.\n\nCo-authored-by: Jianwen Dong \u003cdjianwen@vmware.com\u003e\nCo-authored-by: Gaurab Dey \u003cgaurabd@vmware.com\u003e\n"
    },
    {
      "commit": "195895cc68f85648cb7e2b5fdc92f6069e006dfb",
      "tree": "e3a67afa5fa78c56f8e0c42cf45c4a1ae008b5bf",
      "parents": [
        "76a5db2f17b1338ef9acfba49fb40b458fe172f3"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Jul 20 18:02:25 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Fri Aug 05 16:18:14 2022 +0300"
      },
      "message": "Graph: Add multi column support for SSSP and APSP\n\nJIRA: MADLIB-1506\n\nThis is a follow up commit for 8a24663ccf2ee76ef538c416ed505485b740ff04,\napplying the same logic to add multi column vertex identification\nsupport to SSSP, APSP, and their respective get path functions.\n"
    },
    {
      "commit": "76a5db2f17b1338ef9acfba49fb40b458fe172f3",
      "tree": "6797d7128e1f03d56f0e82d41ad259825c301da9",
      "parents": [
        "918cc1466f4cea17c08a938199d8b74216dd7489"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Aug 05 16:16:55 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Fri Aug 05 16:17:40 2022 +0300"
      },
      "message": "XGBoost: Fix documentation example\n"
    },
    {
      "commit": "918cc1466f4cea17c08a938199d8b74216dd7489",
      "tree": "d889d34fd4bf060780586df6b748b93582ebec4f",
      "parents": [
        "86e9ad28f6c65b42423898d07de92380e28760b6"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Aug 05 15:51:16 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Fri Aug 05 16:16:11 2022 +0300"
      },
      "message": "Remove deep learning tests from jenkins\n"
    },
    {
      "commit": "86e9ad28f6c65b42423898d07de92380e28760b6",
      "tree": "fb8769464cb1fd206a70c47a59828d9a494f5e42",
      "parents": [
        "f9761f497bf29ae52f1406d4bd7b046d2726b6ab"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Aug 05 11:45:08 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Fri Aug 05 16:16:11 2022 +0300"
      },
      "message": "Update version number to 1.21.0-dev\n"
    },
    {
      "commit": "f9761f497bf29ae52f1406d4bd7b046d2726b6ab",
      "tree": "ee21baccb3501da5740d2f556570143976f91ba8",
      "parents": [
        "05d0a9bd64cb8d421af4ce96971fa9520ebc8707"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Jul 06 15:57:35 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Jul 18 15:52:16 2022 +0300"
      },
      "message": "Update RELEASE_NOTES\n"
    },
    {
      "commit": "05d0a9bd64cb8d421af4ce96971fa9520ebc8707",
      "tree": "7f9156f8e3e1df082f572782f31e39101afc16bf",
      "parents": [
        "954d0f78b915b687763e6609e4ded35e5786cbaf"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Jul 06 15:45:14 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Jul 18 15:52:16 2022 +0300"
      },
      "message": "Build: Update version number to 1.20.0\n"
    },
    {
      "commit": "954d0f78b915b687763e6609e4ded35e5786cbaf",
      "tree": "5da0a27a6a9a724a8ff5de5658145714ef2e3a6f",
      "parents": [
        "8a24663ccf2ee76ef538c416ed505485b740ff04"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Jul 06 15:45:03 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Jul 18 15:52:16 2022 +0300"
      },
      "message": "Fix create_changelist udoc command\n"
    },
    {
      "commit": "8a24663ccf2ee76ef538c416ed505485b740ff04",
      "tree": "3c07848238bc066ffb4de77a3c913d213aa148e8",
      "parents": [
        "49bb11f04ebfb015f8b4f96947e0e8174929c841"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Mon Jun 13 20:03:05 2022 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Jul 07 11:03:37 2022 +0300"
      },
      "message": "Graph: Add multi column support for Pagerank and WCC\n\nJIRA: MADLIB-1502, MADLIB-1503\n\nThis commit adds support for identifying vertices with multiple columns\nfor Pagerank and WCC modules. The rest of the graph modules will add the\nsame functionality in later commits.\n\nThe multi column support is done by creating views in which we collect\nthe given BIGINT columns into a single BIGINT array.\n\nCo-authored-by: Bhuvnesh Chaudhary \u003cbchaudhary@pivotal.io\u003e\n"
    },
    {
      "commit": "49bb11f04ebfb015f8b4f96947e0e8174929c841",
      "tree": "28903d05cd635b81ba25d018766d8e9e6de5ea58",
      "parents": [
        "45f533b2c79ea27fd101f00da8b2f0b5d2d9946c"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Jun 09 18:46:42 2022 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Wed Jun 22 16:31:35 2022 -0400"
      },
      "message": "SVM: Disable ORCA to reduce planning time cost\n"
    },
    {
      "commit": "45f533b2c79ea27fd101f00da8b2f0b5d2d9946c",
      "tree": "92a4051cb98d4f90f8eec19b89ceaf111742ef62",
      "parents": [
        "f37a92924d21204a83566f66e2c814ad22b88588"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Jun 09 16:59:54 2022 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Wed Jun 22 16:31:35 2022 -0400"
      },
      "message": "SVM: Fix dev-check test typo\n"
    },
    {
      "commit": "f37a92924d21204a83566f66e2c814ad22b88588",
      "tree": "ed96cb97e20ac2a10c1cbb037eebbe97390e2662",
      "parents": [
        "ac22af57a65a3e5082bedb3cb9bc064388ee79db"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Jun 09 16:57:33 2022 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Wed Jun 22 16:31:35 2022 -0400"
      },
      "message": "Utilities: Reuse update plan in GroupIterationController\n\nThe group iteration controller prepares a plan and executes it during\nthe update phase. For some modules, this plan does not change between\niterations. With this commit, the plan gets saved and reused as needed.\n\nCo-authored-by: Bhuvnesh Chaudhary \u003cbchaudhary@pivotal.io\u003e\n"
    },
    {
      "commit": "ac22af57a65a3e5082bedb3cb9bc064388ee79db",
      "tree": "e5cb779dfc18133ca9e6aab5e562dcd27741bc0c",
      "parents": [
        "4a98bc47a73d729e5fbe9f1ccb19acb9bdb8f17d"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri May 06 15:05:28 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Jun 16 13:31:07 2022 -0400"
      },
      "message": "XGBoost: Add new module xgboost\n\nJIRA: MADLIB-1425, MADLIB-1490\n\nThis commit adds a new module to use XGBoost library via MADlib function\ncalls. Parallel grid search of parameters is supported.\n\nCo-authored-by: Bhuvnesh Chaudhary \u003cbchaudhary@pivotal.io\u003e\nCo-authored-by: Srivatsan Ramanujam \u003cvatsan.cs@gmail.com\u003e\nCo-authored-by: Ian Pytlarz \u003cipytlarz@purdue.edu\u003e\n"
    },
    {
      "commit": "4a98bc47a73d729e5fbe9f1ccb19acb9bdb8f17d",
      "tree": "cb5b7f712fe2aa8a7d56f2a79276668f37021957",
      "parents": [
        "9f1c50291846a6710261873d1475204c95a5ccd3"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed May 18 15:21:43 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Jun 16 13:31:07 2022 -0400"
      },
      "message": "Fix jenkins\n"
    },
    {
      "commit": "9f1c50291846a6710261873d1475204c95a5ccd3",
      "tree": "0b858a5c3f32b0695d82704081acd543a88ba81d",
      "parents": [
        "011bddf96b7a015ce0235f89a615e76fbedaee14"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Tue May 17 16:03:26 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Jun 16 13:31:07 2022 -0400"
      },
      "message": "Add users for docker commands\n"
    },
    {
      "commit": "011bddf96b7a015ce0235f89a615e76fbedaee14",
      "tree": "5b5102f39bf93816ddd49c4732775e827af33568",
      "parents": [
        "bdf8cae0fc632706329d70335a755408e49b90dc"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu May 12 16:39:08 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Jun 16 13:31:07 2022 -0400"
      },
      "message": "Update gitignore to exclude /cmake-build-debug\n"
    },
    {
      "commit": "bdf8cae0fc632706329d70335a755408e49b90dc",
      "tree": "22b0120b78a512fc7d75b5a868e7cd20ac031c03",
      "parents": [
        "5c70da3364fed4eee8601f77f41d10723ab984c2"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Mon May 02 11:35:03 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu May 12 16:37:35 2022 +0300"
      },
      "message": "Doc: Update online examples for various model selection modules\n"
    },
    {
      "commit": "5c70da3364fed4eee8601f77f41d10723ab984c2",
      "tree": "7f2bb9c15faf4a7cdaf483e2eb6a026f4e0987d6",
      "parents": [
        "5cb3a39044836ff15f0ac64bd3c80c2abc357816"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Mon May 02 11:26:27 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu May 12 16:37:35 2022 +0300"
      },
      "message": "Doc: Update online examples for various unsupervised learning modules\n"
    },
    {
      "commit": "5cb3a39044836ff15f0ac64bd3c80c2abc357816",
      "tree": "aea3eb35e2c1ecbd03b380738c6a2072eccac5ea",
      "parents": [
        "26eea8d6d27cb4b096cf9eb92c9ea586b62ace98"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Mon May 02 10:51:42 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu May 12 16:37:35 2022 +0300"
      },
      "message": "Update docker_start.sh to find pg_ctl in PATH\n"
    },
    {
      "commit": "26eea8d6d27cb4b096cf9eb92c9ea586b62ace98",
      "tree": "3ee17e9d34ca29097be52c6a14413663cc516797",
      "parents": [
        "996b279edd6eb126f11aae7c80429afd063ff734"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Apr 28 15:32:29 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu May 12 16:37:35 2022 +0300"
      },
      "message": "Doc: Update online examples for various supervised learning modules\n"
    },
    {
      "commit": "996b279edd6eb126f11aae7c80429afd063ff734",
      "tree": "ff5c6060abc712639142dc5ab2e138907c84d95e",
      "parents": [
        "5174627afa9122c7d6aa17479fcd7a0e4dfd4cf4"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Apr 29 17:28:02 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu May 12 16:37:35 2022 +0300"
      },
      "message": "GLM: Disable ORCA to improve exec time\n"
    },
    {
      "commit": "5174627afa9122c7d6aa17479fcd7a0e4dfd4cf4",
      "tree": "c2565953d86729f4170eb8b95243040f3755dc94",
      "parents": [
        "09fbf5b15a37e4951e5706e7f13079cdf64b946b"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Apr 27 14:15:22 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu May 12 16:37:35 2022 +0300"
      },
      "message": "Elastic-net: Disable ORCA to improve exec time\n"
    },
    {
      "commit": "09fbf5b15a37e4951e5706e7f13079cdf64b946b",
      "tree": "4e47b93f861dbbc33345f7df91f44eb0bd7284e3",
      "parents": [
        "1be4525b074c3c96d8cd98052dc717aff38453f1"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Mar 18 10:06:44 2022 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Mon Apr 25 12:51:25 2022 +0300"
      },
      "message": "Update version to 1.20.0-dev and add release date\n\nCo-authored-by: Bhuvnesh Chaudhary \u003cbchaudhary@pivotal.io\u003e\n"
    },
    {
      "commit": "1be4525b074c3c96d8cd98052dc717aff38453f1",
      "tree": "840a8c0d9caf35e51de8d87eae4251d2e157f88c",
      "parents": [
        "c87ddbaee8b2fb3af5c1d86bc0b51134a0e503a4"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Mar 03 16:47:43 2022 -0500"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Mar 03 17:03:31 2022 -0500"
      },
      "message": "Update RELEASE_NOTES\n\nCo-authored-by: Bhuvnesh Chaudhary \u003cbchaudhary@pivotal.io\u003e\n"
    },
    {
      "commit": "c87ddbaee8b2fb3af5c1d86bc0b51134a0e503a4",
      "tree": "40b06f52924a3c83f5605730e9fd599a5f21baef",
      "parents": [
        "44ca0ef433c0153880833ec789753210488d93b6"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Mon Feb 28 21:30:17 2022 -0500"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Mar 03 17:00:23 2022 -0500"
      },
      "message": "GLM-multinom: Use non-temp tables in GroupIterationController\n\nThere is a potential issue with pg_temp not cleaning up correctly in\nthe case of a failure in GLM and multinom. This commit changes the\ndefault value to create the temp table used for state aggregation to\navoid the temporary tables.\n\nCo-authored-by: Bhuvnesh Chaudhary \u003cbchaudhary@pivotal.io\u003e\n"
    },
    {
      "commit": "44ca0ef433c0153880833ec789753210488d93b6",
      "tree": "7d0afcd8a27e905ccbe7fff1185c9f370aa7896f",
      "parents": [
        "496ce5d485a5a5a7673a445988cb7561fb4ab31c"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Mar 02 14:40:54 2022 -0500"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Mar 03 10:21:00 2022 -0500"
      },
      "message": "Fix jenkins docker parameter ordering\n"
    },
    {
      "commit": "496ce5d485a5a5a7673a445988cb7561fb4ab31c",
      "tree": "3aa5960d88d7422ec8b19ebd81df96c9c3ee5321",
      "parents": [
        "67a32465b219076fa2b46db537967c114f65801b"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Feb 24 18:50:01 2022 -0500"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Mar 03 10:21:00 2022 -0500"
      },
      "message": "Add error checking for pg_ctl, gpconfig and gpstop\n"
    },
    {
      "commit": "67a32465b219076fa2b46db537967c114f65801b",
      "tree": "b45ec92fc0a6b7877967906d7785776332b68c02",
      "parents": [
        "be297fe6beada0640f93317e8948834032718e32"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Feb 24 18:45:22 2022 -0500"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Mar 03 10:21:00 2022 -0500"
      },
      "message": "Revert \"Build: Add OSX tarball for release\"\n\nThis reverts commit ccb0314347d512e9c8bf3e5c01ee91a14f9eaad5.\nThe OSX tarball has permission and security related issues. Better to\nshelf this for a later release.\n"
    },
    {
      "commit": "be297fe6beada0640f93317e8948834032718e32",
      "tree": "e00939cc56fc34a8d9ae5fe340e3583ff58801d8",
      "parents": [
        "ccb0314347d512e9c8bf3e5c01ee91a14f9eaad5"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Jan 06 15:34:12 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Jan 06 19:40:31 2022 +0300"
      },
      "message": "Add JIRA numbers to the RELEASE_NOTES\n"
    },
    {
      "commit": "ccb0314347d512e9c8bf3e5c01ee91a14f9eaad5",
      "tree": "9b5eb6977f09b39d032a667ae4c9d88ff544150f",
      "parents": [
        "03e522948f0ae41d2d61ec7d0019c27153172cd7"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Mon Dec 20 20:38:35 2021 -0500"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Jan 06 19:40:31 2022 +0300"
      },
      "message": "Build: Add OSX tarball for release\n\nJIRA: MADLIB-1493\n\nSince the usual OSX package is not supported anymore, MADlib will\nrelease a compressed file that contains the build folder.\n\nTo ensure portability, this commit adds a script to update symlinks\nCurrent, bin, and doc as well as a Readme file.\n"
    },
    {
      "commit": "03e522948f0ae41d2d61ec7d0019c27153172cd7",
      "tree": "007d9486169d56f49a2c66303a12e3e3bd321581",
      "parents": [
        "8e81bb8a48868c510fac900a3b0d080eb4d43077"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Mon Dec 20 17:24:07 2021 -0500"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Jan 06 19:40:31 2022 +0300"
      },
      "message": "Update RELEASE_NOTES and DBSCAN docs\n"
    },
    {
      "commit": "8e81bb8a48868c510fac900a3b0d080eb4d43077",
      "tree": "4ed89aed6d238ec09ed03c376fd01d180197df50",
      "parents": [
        "bf1893a57c650f17d927e12debb1291e2cffca5e"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Jan 05 20:36:21 2022 +0300"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Thu Jan 06 19:40:31 2022 +0300"
      },
      "message": "Update Apache copyright date to 2022 in the NOTICE file\n"
    },
    {
      "commit": "bf1893a57c650f17d927e12debb1291e2cffca5e",
      "tree": "2a989ddfdeed78deac64bee85da6295a3d84c076",
      "parents": [
        "a2311d9288ddc74dcbf8850fff6120d53bea17cd"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Dec 09 15:43:53 2021 -0500"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Wed Dec 15 12:19:39 2021 -0500"
      },
      "message": "Release: Update version numbers and add release notes\n"
    },
    {
      "commit": "a2311d9288ddc74dcbf8850fff6120d53bea17cd",
      "tree": "9215f68244c0330674e2c575e1a6347fef723151",
      "parents": [
        "95b3643f087e357dd9d3cbb3083dd543101159f2"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Nov 05 18:48:55 2021 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Tue Dec 07 19:04:18 2021 -0500"
      },
      "message": "Build: Use dynamic_library_path for module pathname\n\nMADlib used to hard code the path for the module_pathname.\nThis has been changed to accomodate for major version upgrades of\nPostgres and Greenplum. However, some systems do not play well\nwith symlinks, so we revert that change and use the\ndynamic_library_path GUC to find the .so file instead.\n"
    },
    {
      "commit": "95b3643f087e357dd9d3cbb3083dd543101159f2",
      "tree": "c132eb83f4c2c46b4ea18c3f1bc3d53a70282d7b",
      "parents": [
        "ac0c46d1ab0d02800790338e8c33dd079bd92ac8"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Nov 05 17:54:55 2021 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Tue Dec 07 19:04:18 2021 -0500"
      },
      "message": "Revert \"Build: Fix module pathname for gppkg\"\n\nThis reverts commit e1337aec283e3bcfb9de0a6f950f780664fa9940.\n\nThe reverted commit was creating a symlink for the lilbmadlib.so file.\nSince we decided to use the dynamic_library_path GUC, this link is\nand the associated changes are not needed any more.\n"
    },
    {
      "commit": "ac0c46d1ab0d02800790338e8c33dd079bd92ac8",
      "tree": "8e95cb525ad6aa79f80063ba727b11144aeea93e",
      "parents": [
        "dfbd29d0c6e21abeb9eef6a0968bd8fbc58b37ba"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Nov 11 16:40:41 2021 -0500"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "time": "Thu Nov 11 16:40:41 2021 -0500"
      },
      "message": "@orhankislal WCC: Optimize subtx count and catalog entry frequency (#573)\n\n* WCC: Optimize subtx count and catalog entry frequency\r\n\r\nWCC had a high number of plpy.execute commands. Since each call\r\ncreates a new subtransaction, it was constantly hitting the\r\noverflow limit. This commit merges most of them.\r\n\r\nWCC also created and dropped temp tables in each iteration.\r\nThis bloats the catalog, so this commit uses a few actual\r\ntables and uses truncate/insert."
    },
    {
      "commit": "dfbd29d0c6e21abeb9eef6a0968bd8fbc58b37ba",
      "tree": "fb919a52bdbc4759c568b381a391b7755eba9b54",
      "parents": [
        "29dce5ea8ff8b22a7f1f97a9407d2dc11a1b06d2"
      ],
      "author": {
        "name": "Domino Valdano",
        "email": "dvaldano@vmware.com",
        "time": "Fri Aug 07 15:44:10 2020 -0700"
      },
      "committer": {
        "name": "Domino Valdano",
        "email": "dvaldano@vmware.com",
        "time": "Thu Oct 07 13:24:18 2021 -0700"
      },
      "message": "DBSCAN: Fast parallel-optimized DBSCAN\n\nThis optimized version of DBSCAN offers a dramatic improvment over\nthe original brute force implementation.  It should have roughly\nroughly O(N/S log N) runtimes, where N is the size of the input\ndataset (number of points/rows) and S is the number of segments used\n(note:  the algorithm now decides on its own how many segments to use,\nas sometimes splitting things up further will only increase the runtime.\nTherefore S is not necessarily the total number of segments in the\ncluster, it may be less depending on the structure of the dataset.)\n\nThis borrows many aspects from the previous attempt (using\nan overlapping spatial binary tree to segment the dataset and\nusing an R tree index to speed up range queries), but differs in\nits approach in other ways.\n\nThe brute force DBSCAN runs on N^2 time. To improve this,\nwe split the data into different overlapping regions, running\nDBSCAN on each in parallel, then merging the results together\non the coordinator. More specifically:\n\n1.  The data is segmented into connected spatial regions, using an\n    in-house designed binary tree optimized specifically for DBSCAN.\n    This custom DBSCAN-optimized tree is similar to a kd-tree, but tries to simultaneously\n    keep the child nodes of each node as balanced as possible while splitting along a\n    hyperplane which favors passing through the least dense regions of the\n    space. To accomplish this, it constructs a course-grained density map\n    of each node before deciding where to split, minimizing a loss function which\n    tries to estimate the longest expected runtime of any segment assigned to\n    the descendants of that node.\n\n2.  Each leaf of the optimized spatial tree runs the dbscan\n    algorithm on the points in its spatial region, including\n    some points from other regions near its boundaries, using\n    an R tree index for efficient range queries.\n\n3.  Merge the clusters found in each leaf with each other by keeping track\n    of which points in neighboring leaves are within eps of a cluster\u0027s home\n    leaf. Uses madilb\u0027s wcc graph module (weakly connected components) to identify\n    the equivalence classes of clusters across all leaves.\n\n\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d Partial List of Detailed Improvements \u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\n\n- Initial build_kdtree() re-write\n\n    Previously, we were including a lot of points that were nowhere\n    near the border as border points.  The new version should only\n    include those which are actually within epsilon distance of a border.\n\n    It also outputs the information in a different format that\n    provides a bit more information, and will make a fast merge of\n    clusters from different segments possible in a later stage.\n\n    The output of build_kdtree (renamed to build_optimized_tree) is now\n    a single table of annotated points instead of many tables.  The __leaf_id__\n    column has been split into two similar concepts, __leaf_id__ and\n    __dist_id__.  __leaf_id__ tells you which leaf a point is in, and\n    __dist_id__ tells you which (extended) leaf a row is in--which\n    determines the segment on which it will be stored.\n\n    Note that each point can have several entries in the segmented_output\n    table.  There is not a 1-to-1 correspondence between points and rows,\n    since we store each border point in multiple overlapping leaves\n    (segments) at once.\n\n- Convert find_core_points() UDA to dbscan_leaf() UDF\n\n- Add dbscan_record class, for managing input and output of dbscan_leaf\n   ( maps to dbscan_record SQL TYPE, but with some hidden fields added for\n   keeping track of record-specific variables in python; these are\n   automatically removed before yielding the final result, when the\n   record gets cloned via the copy constructor )\n\n- Allow passing id_column as an expression:\n       id_column input param is now handled the same as we do the\n    point column, so that any expression evaluating to an integer type\n    should work instead of requiring the id to be just a single column.\n    The name of the column in the output table will now always be \u0027id\u0027,\n    but the actual expression used on the original input table is still\n    stored in the summary table.\n\n- Use sqrt(eps) only for kd_tree, raw eps for dbscan_leaf\n    The second one compares a squared distance to a squared distance\n    metric, while for generating the kd tree we need an actual distance\n\n- Further re-write of build_kd_tree():\n\n      It was taking 30s to generate the overlapping kd-tree for 10,000 points,\n    where we were doing many long queries, iteratively in python.  And\n    several minutes for 100,000 points.  But this should be the fast part!\n\n      Now, it will generate the non-overlapping kd-tree first, then add\n    the external points for the overlap regions.  The generation of\n    the regular kd-tree, along with cutoff table, is working now and\n    very fast (\u003c 1s for 10,000 points, \u003c 5s for 100,000 points).\n    It\u0027s just a single 0.5s recursive query for generating the segmented\n    source (without external point augmentation), and an even simpler 0.1s\n    query afterwards for the cutoffs.\n       I don\u0027t expect adding the overlap points will take too much time\n    either, as we already have the non-overlapping cutoffs.  We can\n    generate a leaf bounds table from that (min \u0026 max boundary points\n    for each leaf) without even touching the source table, and should\n    be able to join that to the source table in a final query which\n    scans through N/S * max_depth rows per segment (similar to the\n    first two fast queries).\n\n-  Adds print_timings debug statements, for detailed performance profiling\n\n-  Changes __dist_id__ to dist_id and add new __dist_id__;\n   This adds another layer of abstraction to the mapping from\n   leaves to dist_id\u0027s to segments, but was needed to ensure that\n   the leaves are balanced among the physical cluster segments...\n   otherwise, for a small number of leaves the dist_ids map somewhat\n   randomly to the segments, and several can end up on the same segment\n   while other segments handle no data at all.  This will always\n   place leaves on the segments in a round-robin fashion rather than\n   based on gpdb\u0027s internal DISTRIBUTED BY hash function\n\n-  Move dbscan_optmized into its own class\n\n-  Fix use of wrong eps in build_kd_tree (there were cases where we\n   were using eps instead of eps^2 for the squared-dist-norm).\n\n-  Add functions for optimized tree segmentation (uses loss function\n   instead of kdtree for making cuts)\n\n-  Skip missing dist_id\u0027s while creating dist_map:\n\n   If all the dist_id\u0027s are sequential with no gaps, as with the kd-tree\n   then this wouldn\u0027t be necessary.  But now, since some branches of\n   the tree stop splitting before others, we need to account for these\n   missing dist_id\u0027s... otherwise you can end up with several dist_id\u0027s\n   on the same segment and other segments with nothing.\n\n- Accelerates range_query() using vectorized numpy operations\n\n- Move many functions into a self-contained DBSCANStorage class\n\n  This makes things much easier to manage, less params to pass around\n\n- Early removal of points from rtree, ony storing minimum necessary\n\n    Delete each point from rtree as soon as the first range query is run\n    on it, to speed up range queries run on neighboring points.  The smaller\n    the tree, the faster the results get returned.  (Searching the tree\n    for a single point is log N, but a range query returns many points... so\n    it\u0027s at *least* O(log N + k) and possibly more like O(k log N), where k\n    is the number of candidate neighbors returned, which we have to go\n    through one at a time to check if they are actually in range)\n\n    Deleting points as soon as we label them without any further\n    modifications would throw off the calculation of is_core_point.\n    We can still delete them early, but to ensure is_core_point is\n    right we need to keep track of the number of neighbors each internal\n    point has.  Some are internal and others are internal.  The sum of\n    both determines the final value of is_core_point.\n\n    After doing this, a lot of things we were previously doing no longer\n    made sense.  Since we need to query external neighbors of the\n    internal points anyway, we may as well do all of the inverse-queries\n    up front instead (searching for internal neighbors of each external\n    point).  In general, external points are expected to be fewer than\n    internal points so hopefully this will be faster.  But then we have\n    no more use for the cluster-specific trees (unless we want to add\n    them back for use by the internal points?)  range_query no longer\n    has min_results or max_results params\n\n- All examples passing, one each for 3D, 4D, 9D, 16D, 25D, 36D, 49D,\n    and 64D, tested both on 3 segment cluster and 16 segment cluster\n\n- Change default depth to num_segs instead of log_2(num_segs)\n\n    log_2(num_segs) made sense for the kd_tree, because each node of the tree\n    is always split exactly in half, resulting in a balanced binary tree.\n    With the optimized tree algorithm, splits can happen anywhere, there can\n    be 1 segment on the left and 50 on the right, and then later down the\n    tree the 50 get split up more while the 1 does not.  In the worst case,\n    one node gets split off from the rest at each cut, meaning in order to\n    populate num_segs leaves you need num_segs cuts.  This could result in\n    a longer optimzation time, but the optimization phase is usually very\n    fast compared to the actual parallel-DBSCAN phase that follows.\n    It should only matter for small datasets, and the user always has the\n    option of setting a lower max_depth than the default if optimization\n    is taking too much time.\n\n- Cast input to DOUBLE PRECISION[] in source_view\n\n    (This is necessary to be able to handle input tables where the points\n     column is REAL[], INTEGER[], etc.\n\n- Rename depth -\u003e max_segmentation_depth\n- Rename \"kd_tree\" method option to \"optimized\"\n- Remove unused kd_tree code\n\n- Fix a subtle issue involving early removal of border points from rtree:\n\n    If range_query is called on a point in the outer for loop (where we\u0027re\n    looking for a new cluster seed, not growing a cluster yet), then it will be\n    tentatively labelled as NOISE_POINT... but may be assigned to a cluster\n    later, if it borders a core point while we\u0027re growing a cluster.\n    Instead of keeping these points in the rtree, we go ahead and delete them\n    (same as for core points or noise points, which we know won\u0027t need\n    further processing) but must keep track of a list of _possible_border_points\n    in each internal db_rec.  During update_neighbors (called by range_query),\n    we can also add the id to each of its unlabelled neighbors\u0027\n    _possible_border_points list, in case they turn out to be core points later.\n    (None of them will be labelled as core points yet, since anything marked as\n    a core point has already been removed from the tree; but there may be some\n    NOISE_POINT\u0027s; we skip the neighboring NOISE_POINT\u0027s since we know they\n    won\u0027t ever be core points.)\n\n- Add scripts for creating blobs for perf testing\n- Add scripts for generating and running dbscan perf tests\n"
    },
    {
      "commit": "29dce5ea8ff8b22a7f1f97a9407d2dc11a1b06d2",
      "tree": "cabf12a6d072f60d318d4aa583b013c493e41c13",
      "parents": [
        "c0ae940bbebba4d934a8f48dbb4f84d285465841"
      ],
      "author": {
        "name": "Domino Valdano",
        "email": "domino@apache.org",
        "time": "Thu Oct 07 10:26:44 2021 -0700"
      },
      "committer": {
        "name": "Domino Valdano",
        "email": "dvaldano@vmware.com",
        "time": "Thu Oct 07 13:24:18 2021 -0700"
      },
      "message": "DEBUG: Adds WithTracebackForwarding() macro and report_segment_tracebacks param\n\nThis adds a new macro WithTracebackForwarding() to SQLCommon.m4_in,\nwhich can be used to cause a UDF intended to run on the segments to\nforward any traceback information attached to an exception back to\nthe coordinator.\n\nIn order to have the coordinator intercept the forwarded traceback\nmessage and attach it to the DETAILS of the exception thrown on\ncoordinator, there is also a new optinoal flag for DEBUG.plpy_execute(),\nwhich must be set to True:\n\nDEBUG.plpy_execute(..., report_segment_tracebacks\u003dTrue)\n\nWithTracebackForwarding() should wrap any python statement which might\nraise an exception on a segment.  It will enclose it in an appropriate\ntry/except block and handle the exception.\n\nFor example, see definition of plpython UDF __dbscan_leaf() which\nis called from DEBUG.plpy_execute() on coordinator and gets run on the\nsegments:\n\nCREATE OR REPLACE FUNCTION MADLIB_SCHEMA._dbscan_leaf(\n\t...\n) RETURNS SETOF MADLIB_SCHEMA.dbscan_record AS\n$$\n...\n    PythonFunctionBodyOnlyNoSchema(dbscan,dbscan)\n    WithTracebackForwarding(return dbscan.dbscan_leaf(*args))\n...\n$$ LANGUAGE plpythonu VOLATILE\n"
    },
    {
      "commit": "c0ae940bbebba4d934a8f48dbb4f84d285465841",
      "tree": "f5c6e4ea6e182887aee3fa380ea63bcca42f2e03",
      "parents": [
        "e1337aec283e3bcfb9de0a6f950f780664fa9940"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu Jul 02 14:33:38 2020 -0400"
      },
      "committer": {
        "name": "Domino Valdano",
        "email": "dvaldano@vmware.com",
        "time": "Thu Oct 07 13:24:18 2021 -0700"
      },
      "message": "DBSCAN: Add indexing optimizations to improve the runtime\n\nJIRA: MADLIB-1017\n\nThe brute force DBSCAN runs on N^2 time. To improve this,\nwe added two layers of indexing.\n1. The data is broken into chunks with kd-tree\n2. Each leaf of the kd-tree creates an rtree index for\nefficient range queries.\n\nIn addition we added a separate process to reduce the number\nof edges to consider during wcc operation.\n"
    },
    {
      "commit": "e1337aec283e3bcfb9de0a6f950f780664fa9940",
      "tree": "faf7bbeb054f2ee4db17f66a8983de996c1e18e1",
      "parents": [
        "2e34c0f45a6e0f3be224ef58a6f4a576eb8eb89a"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Wed Jul 14 14:15:02 2021 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@vmware.com",
        "time": "Wed Jul 28 13:15:06 2021 -0400"
      },
      "message": "Build: Fix module pathname for gppkg\n\nThe MODULE_PATHNAME variable is set to an absolute path\nduring preprocessing. This creates an issue with the gpupgrade\nsince the database path changes and the old path becomes invalid.\n\nThis commit creates a link in the $libdir directory of the\ngreenplum installation and uses that link to create the MADlib\nUDFs.\n\nIt fixes an issue with the upgrade scripts as well. The drop\ncommands are generated using the arguments but including\nDEFAULT values breaks the drop function command. The commit\nfilters them out.\n"
    },
    {
      "commit": "2e34c0f45a6e0f3be224ef58a6f4a576eb8eb89a",
      "tree": "2c9acb9b0050b5f3c3c0ab566103a2a71ecb6dd7",
      "parents": [
        "64ed9bb3af60fd75f5a21c1f08770f6ca08d2103"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Thu May 06 14:44:00 2021 +0300"
      },
      "committer": {
        "name": "Ekta Khanna",
        "email": "ekhanna@pivotal.io",
        "time": "Mon May 10 18:23:57 2021 -0700"
      },
      "message": "PMML: Rename builder.py to circumvent ignore scripts\n\nPGXN build scripts filter out any potential build folders\nusing a filter with */build* clause. This causes builder.py\nfile to be ignored. Such a generic name is not needed for a\nspecific module so this commit renames the file.\n"
    },
    {
      "commit": "64ed9bb3af60fd75f5a21c1f08770f6ca08d2103",
      "tree": "c1464b9eba3cfc71815dd01332b7c713bf531239",
      "parents": [
        "e77ec591d4a26d64eccd35326454e367051a768b"
      ],
      "author": {
        "name": "Nikhil Kak",
        "email": "nkak@vmware.com",
        "time": "Fri Apr 30 17:38:38 2021 -0700"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@pivotal.io",
        "time": "Fri May 07 19:46:04 2021 +0300"
      },
      "message": "MLP: Set lambda value for minibatch\n\nPreviously in our mlp minibatch code, we were not setting the lambda value\ncorrectly. This meant that in the function `getLossAndUpdateModel`, we would\nalways use the default value of 0 even if the user passed in a non zero value.\n\nThis commit fixes it by setting the lambda value before calling the\ngetLossAndUpdateModel function\n\nCo-authored-by: Ekta Khanna \u003cekhanna@vmware.com\u003e\n"
    },
    {
      "commit": "e77ec591d4a26d64eccd35326454e367051a768b",
      "tree": "401789579686239bf9e6eb9bd941d9a132a49af5",
      "parents": [
        "77f3fd7ef6fdc0eb54656d73ae0d2c06244062ad"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Mon Mar 29 11:28:47 2021 -0400"
      },
      "committer": {
        "name": "Orhan Kislal",
        "email": "okislal@pivotal.io",
        "time": "Fri May 07 19:46:04 2021 +0300"
      },
      "message": "MLP: Add rmsprop and Adam optimization techniques\n\nJIRA: MADLIB-1434, MADLIB-1435\n\nThis commit adds two new variants on the gradient descent algorithms.\nThey are paried together since they both involve adaptive learning rates.\n\nrmsprop: Keep a moving average of the squared gradient for each weight\nAdam: Keep an exponential moving average of the gradient and the squared gradient\n"
    },
    {
      "commit": "77f3fd7ef6fdc0eb54656d73ae0d2c06244062ad",
      "tree": "7a58a2cf0aa605c0a9865b9db54536967aec6c55",
      "parents": [
        "1ba0ab9ce444d0289b168c209f3060483fa6e72e"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Apr 30 18:30:00 2021 +0300"
      },
      "committer": {
        "name": "Ekta Khanna",
        "email": "ekhanna@pivotal.io",
        "time": "Wed May 05 11:43:56 2021 -0700"
      },
      "message": "DL: Fix object table schema error message\n"
    },
    {
      "commit": "1ba0ab9ce444d0289b168c209f3060483fa6e72e",
      "tree": "92ca5fcf7e7457f45e55c6371008d1a42dca5403",
      "parents": [
        "c2bedea16eec9ba55e13e4a8ed797d9db91d9c6d"
      ],
      "author": {
        "name": "Orhan Kislal",
        "email": "okislal@apache.org",
        "time": "Fri Apr 30 14:54:01 2021 +0300"
      },
      "committer": {
        "name": "Ekta Khanna",
        "email": "ekhanna@pivotal.io",
        "time": "Wed May 05 11:43:56 2021 -0700"
      },
      "message": "DL: Fix metrics error message\n"
    }
  ],
  "next": "c2bedea16eec9ba55e13e4a8ed797d9db91d9c6d"
}