blob: 2189024d633a72cad91ec52c9d8683de8a0a1918 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
LOAD 'age';
SET search_path=ag_catalog;
SELECT create_graph('graph');
-- Should error out
SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,3]") $$) AS (n agtype);
-- Create the extension in the public schema
CREATE EXTENSION vector SCHEMA public;
-- Should error out
SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,3]") $$) AS (n agtype);
-- Should work
SET search_path=ag_catalog, public;
SELECT create_graph('graph');
SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n halfvec);
SELECT * FROM cypher('graph', $$ RETURN "[1.22,2.22,3.33]"::vector $$) AS (n sparsevec);
SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec);
SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec);
SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector(3));
SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec(3));
SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec(3));
-- Should error out
SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n vector(2));
SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n halfvec(2));
SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector $$) AS (n sparsevec(2));
SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n vector(4));
SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n halfvec(4));
SELECT * FROM cypher('graph', $$ RETURN [1.22,2.22,3.33]::vector(3) $$) AS (n sparsevec(4));
--
-- Test functions
--
SELECT * FROM cypher('graph', $$ RETURN l2_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN inner_product("[1,2,3]", "[1,2,4]") $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN cosine_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN l1_distance("[1,2,3]", "[1,2,4]") $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN vector_dims("[1,2,3]") $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN vector_norm("[1,2,3]") $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]") $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN l2_normalize("[1,2,3]")::text $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4) $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN subvector("[1,2,3,4,5,6]", 2, 4)::text $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN binary_quantize("[1,2,4]") $$) AS (n bit(3));
--
-- Test operators
--
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector + [1,2,4]::vector $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector - [1,2,4]::vector $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector * [1,2,4]::vector $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector || [1,2,4]::vector $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <#> [1,2,4]::vector $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <=> [1,2,4]::vector $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector <+> [1,2,4]::vector $$) AS (n agtype);
--
-- Due to issues with pattern matching syntax, '-' is not allowed
-- as an operator character, so we have to use the OPERATOR syntax.
--
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (`<->`) [1,2,4]::vector $$) AS (n agtype);
-- Using OPERATOR () syntax
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (+) [1,2,4]::vector $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (-) [1,2,4]::vector $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (*) [1,2,4]::vector $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (||) [1,2,4]::vector $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (`<->`) [1,2,4]::vector $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<#>) [1,2,4]::vector $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<=>) [1,2,4]::vector $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (<+>) [1,2,4]::vector $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.+) [1,2,4]::vector $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.-) [1,2,4]::vector $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.*) [1,2,4]::vector $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.||) [1,2,4]::vector $$) AS (n vector);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.`<->`) [1,2,4]::vector $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<#>) [1,2,4]::vector $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<=>) [1,2,4]::vector $$) AS (n agtype);
SELECT * FROM cypher('graph', $$ RETURN [1,2,3]::vector OPERATOR (public.<+>) [1,2,4]::vector $$) AS (n agtype);
--
-- An example usage
--
SELECT * FROM cypher('graph', $$
CREATE (:Movie {title: "The Matrix", year: 1999, genre: "Action", plot: "A computer hacker learns about the true nature of reality and joins a rebellion to free humanity from a simulated world controlled by machines.", embedding: [-0.07594558, 0.04081754, 0.29592122, -0.11921061]}),
(:Movie {title: "The Matrix Reloaded", year: 2003, genre: "Action", plot: "The rebels continue their fight against the machines, uncovering deeper truths about the Matrix and the nature of their mission.", embedding: [0.30228977, -0.22839354, 0.35070436, 0.01262819]}),
(:Movie {title: "The Matrix Revolutions", year: 2003, genre: "Action", plot: "The final battle between humans and machines reaches its climax as the fate of both worlds hangs in the balance.", embedding: [ 0.12240622, -0.29752459, 0.22620453, 0.24454723]}),
(:Movie {title: "The Matrix Resurrections", year: 2021, genre: "Action", plot: "Neo returns to a new version of the Matrix and must once again fight to save the people from the control of the machines.", embedding: [ 0.34717246, -0.13820869, 0.29214213, 0.08090488]}),
(:Movie {title: "Inception", year: 2010, genre: "Sci-Fi", plot: "A skilled thief is given a chance at redemption if he can successfully perform an inception: planting an idea into someone’s subconscious.", embedding: [ 0.03923657, 0.39284106, -0.20927092, -0.17770818]}),
(:Movie {title: "Interstellar", year: 2014, genre: "Sci-Fi", plot: "A group of explorers travel through a wormhole in space in an attempt to ensure humanity’s survival.", embedding: [-0.29302418, -0.39615033, -0.23393948, -0.09601383]}),
(:Movie {title: "Avatar", year: 2009, genre: "Sci-Fi", plot: "A paraplegic Marine is sent to the moon Pandora, where he becomes torn between following orders and protecting the world he feels is his home.", embedding: [-0.13663386, 0.00635589, -0.03038832, -0.08252723]}),
(:Movie {title: "Blade Runner", year: 1982, genre: "Sci-Fi", plot: "A blade runner must pursue and terminate four replicants who have stolen a ship in space and returned to Earth.", embedding: [ 0.27215557, -0.1479577, -0.09972772, -0.08234394]}),
(:Movie {title: "Blade Runner 2049", year: 2017, genre: "Sci-Fi", plot: "A new blade runner unearths a long-buried secret that has the potential to plunge what’s left of society into chaos.", embedding: [ 0.21560573, -0.07505179, -0.01331814, 0.13403069]}),
(:Movie {title: "Minority Report", year: 2002, genre: "Sci-Fi", plot: "In a future where a special police unit can arrest murderers before they commit their crimes, a top officer is accused of a future murder.", embedding: [ 0.24008012, 0.44954908, -0.30905488, 0.15195407]}),
(:Movie {title: "Total Recall", year: 1990, genre: "Sci-Fi", plot: "A construction worker discovers that his memories have been implanted and becomes embroiled in a conspiracy on Mars.", embedding: [-0.17471036, 0.14695261, -0.06272433, -0.21795064]}),
(:Movie {title: "Elysium", year: 2013, genre: "Sci-Fi", plot: "In a future where the rich live on a luxurious space station while the rest of humanity lives in squalor, a man fights to bring equality.", embedding: [-0.33280967, 0.07733926, 0.11015328, 0.53382836]}),
(:Movie {title: "Gattaca", year: 1997, genre: "Sci-Fi", plot: "In a future where genetic engineering determines social class, a man defies his fate to achieve his dreams.", embedding: [-0.21629286, 0.31114665, 0.08303899, 0.46199759]}),
(:Movie {title: "The Fifth Element", year: 1997, genre: "Sci-Fi", plot: "In a futuristic world, a cab driver becomes the key to saving humanity from an impending cosmic threat.", embedding: [-0.11528205, -0.0208782, -0.0735215, 0.14327449]}),
(:Movie {title: "The Terminator", year: 1984, genre: "Action", plot: "A cyborg assassin is sent back in time to kill the mother of the future resistance leader.", embedding: [ 0.33666933, 0.18040994, -0.01075103, -0.11117851]}),
(:Movie {title: "Terminator 2: Judgment Day", year: 1991, genre: "Action", plot: "A reprogrammed Terminator is sent to protect the future leader of the human resistance from a more advanced Terminator.", embedding: [ 0.34698868, 0.06439331, 0.06232323, -0.19534876]}),
(:Movie {title: "Jurassic Park", year: 1993, genre: "Adventure", plot: "Scientists clone dinosaurs to create a theme park, but things go awry when the creatures escape.", embedding: [ 0.01794725, -0.11434246, -0.46831815, -0.01049593]}),
(:Movie {title: "The Avengers", year: 2012, genre: "Action", plot: "Superheroes assemble to face a global threat from an alien invasion led by Loki.", embedding: [ 0.00546514, -0.37005171, -0.42612838, 0.07968612]})
$$) AS (result agtype);
SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, (m.embedding)::vector $$) AS (title agtype, embedding vector);
-- Check the dimension of the embedding
SELECT * FROM cypher('graph', $$ MATCH (m:Movie) RETURN m.title, vector_dims(m.embedding) $$) AS (title agtype, dimension int);
-- Get top 4 most similar movies to The Terminator using cosine distance
SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"})
RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding)
ASC LIMIT 4
$$) AS (title agtype);
SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"})
RETURN m.title ORDER BY m.embedding::vector <=> search.embedding::vector
ASC LIMIT 4
$$) AS (title agtype);
-- Get top 4 most similar movies to The Matrix using cosine distance
SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"})
RETURN m.title ORDER BY cosine_distance(m.embedding, search.embedding)
ASC LIMIT 4
$$) AS (title agtype);
SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"})
RETURN m.title ORDER BY m.embedding::vector <=> search.embedding::vector
ASC LIMIT 4
$$) AS (title agtype);
-- l2 norm of the embedding
SELECT * FROM cypher('graph', $$ MATCH (m:Movie) set m.embedding=l2_normalize(m.embedding)::agtype return m.title, m.embedding $$) AS (title agtype, embedding agtype);
-- Get top 4 most similar movies to The Terminator using l2 distance
SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"})
RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4
$$) AS (title agtype);
SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Terminator"})
RETURN m.title ORDER BY m.embedding::vector OPERATOR (`<->`) search.embedding::vector
ASC LIMIT 4
$$) AS (title agtype);
-- Get top 4 most similar movies to The Matrix using l2 distance
SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"})
RETURN m.title ORDER BY l2_distance(m.embedding, search.embedding) ASC LIMIT 4
$$) AS (title agtype);
SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"})
RETURN m.title ORDER BY m.embedding::vector OPERATOR (`<->`) search.embedding::vector
ASC LIMIT 4
$$) AS (title agtype);
--
-- Test vector index
--
-- This function will be used to check if index scan
-- is used successfully. We cannot simply have EXPLAIN
-- in the upcoming queries because it produces some
-- hardcoded oids in sort node, which may change in
-- future and break the tests.
CREATE OR REPLACE FUNCTION plan_has_index_scan(sql text)
RETURNS boolean
LANGUAGE plpgsql AS
$$
DECLARE
plan_lines text[];
plan_text text;
BEGIN
EXECUTE format('EXPLAIN (FORMAT JSON, COSTS OFF) %s', sql) INTO plan_text;
-- Return true if 'Index Scan' appears anywhere
RETURN position('"Index Scan"' in plan_text) > 0;
END;
$$;
SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"})
RETURN m.title ORDER BY m.embedding::vector(4) <=> search.embedding::vector(4)
ASC LIMIT 4
$$) AS (title agtype);
-- The index expression below matches the expression
-- seen in the EXPLAIN plan of above query
DO $$
DECLARE
graph_oid oid;
BEGIN
SELECT graphid INTO graph_oid
FROM ag_catalog.ag_graph
WHERE name = 'graph';
EXECUTE format($f$
CREATE INDEX movie_vector_idx ON graph._ag_label_vertex
USING hnsw (((
agtype_access_operator(
VARIADIC ARRAY[
_agtype_build_vertex(id, _label_name_from_table_oid(labels), properties),
'"embedding"'::agtype
]
)::text
)::vector(4)) vector_cosine_ops);
$f$, graph_oid);
END;
$$;
-- Disable seqscan just to test the index
SET enable_seqscan = off;
SELECT plan_has_index_scan($f$
SELECT * FROM cypher('graph', $$
MATCH (m:Movie)
RETURN m.title
ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4)
ASC LIMIT 4
$$) AS (title agtype);
$f$);
SELECT * FROM cypher('graph', $$ MATCH (m:Movie)
RETURN m.title
ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4)
ASC LIMIT 4
$$) AS (title agtype);
DROP INDEX graph.movie_vector_idx;
SET enable_seqscan = on;
-- Test a direct implicit cast
CREATE CAST (agtype AS vector)
WITH INOUT AS implicit;
SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"})
RETURN m.title ORDER BY m.embedding <=> search.embedding
ASC LIMIT 4
$$) AS (title agtype);
SELECT * FROM cypher('graph', $$ MATCH (m:Movie), (search:Movie {title: "The Matrix"})
RETURN m.title ORDER BY m.embedding OPERATOR (`<->`) search.embedding
ASC LIMIT 4
$$) AS (title agtype);
DO $$
DECLARE
graph_oid oid;
BEGIN
SELECT graphid INTO graph_oid
FROM ag_catalog.ag_graph
WHERE name = 'graph';
EXECUTE format($f$
CREATE INDEX movie_vector_idx ON graph._ag_label_vertex
USING hnsw ((
agtype_access_operator(
VARIADIC ARRAY[
_agtype_build_vertex(id, _label_name_from_table_oid(labels), properties),
'"embedding"'::agtype
]
)::vector(4)) vector_cosine_ops);
$f$, graph_oid);
END;
$$;
-- Disable seqscan just to test the index
SET enable_seqscan = off;
SELECT plan_has_index_scan($f$
SELECT * FROM cypher('graph', $$
MATCH (m:Movie)
RETURN m.title
ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4)
ASC LIMIT 4
$$) AS (title agtype);
$f$);
SELECT * FROM cypher('graph', $$ MATCH (m:Movie)
RETURN m.title
ORDER BY m.embedding::vector(4) <=> [-0.07594558, 0.04081754, 0.29592122, -0.11921061]::vector(4)
ASC LIMIT 4
$$) AS (title agtype);
SET enable_seqscan = on;
--
-- Clean up
--
DROP FUNCTION plan_has_index_scan(text);
DROP CAST (agtype AS vector);
SELECT drop_graph('graph', true);
DROP EXTENSION vector CASCADE;