diff options
| author | W. Kosior <koszko@koszko.org> | 2026-01-06 12:00:01 +0100 |
|---|---|---|
| committer | W. Kosior <koszko@koszko.org> | 2026-01-06 12:00:01 +0100 |
| commit | dd9c63928fa73a64962c49092be584be963bd929 (patch) | |
| tree | 88f57db22d5f096f758576437b51597f8d79139c | |
| parent | cde3b99d2cde45eba8ba95cd7a5886a5e6b825bf (diff) | |
| download | AGH-db-lectures-magister.tar.gz AGH-db-lectures-magister.zip | |
| -rw-r--r-- | 15-procedural-sql/procedural-sql.ipynb | 2313 |
1 files changed, 2313 insertions, 0 deletions
diff --git a/15-procedural-sql/procedural-sql.ipynb b/15-procedural-sql/procedural-sql.ipynb new file mode 100644 index 0000000..f2894c1 --- /dev/null +++ b/15-procedural-sql/procedural-sql.ipynb @@ -0,0 +1,2313 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "510b6020", + "metadata": {}, + "outputs": [], + "source": [ + "import agh_db_lectures\n", + "agh_db_lectures.prepare_notebook_for_sql()" + ] + }, + { + "cell_type": "markdown", + "id": "c4459f3f", + "metadata": {}, + "source": [ + "# Procedural SQL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d60b5e4b", + "metadata": {}, + "outputs": [], + "source": [ + "!psql --port 25432 --quiet -c \"CREATE DATABASE procedures WITH OWNER demo_user\" postgres || true\n", + "%sql postgresql://demo_user:demo_pwd@localhost:25432/procedures" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "efb05b48", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP TABLE IF EXISTS vertices CASCADE;\n", + "CREATE TABLE vertices(\n", + " id INT PRIMARY KEY,\n", + " label VARCHAR(50) UNIQUE\n", + ");\n", + "\n", + "DROP TABLE IF EXISTS edges CASCADE;\n", + "CREATE TABLE edges(\n", + " from_id INT,\n", + " to_id INT,\n", + " label VARCHAR(50) UNIQUE,\n", + " PRIMARY KEY (from_id, to_id)\n", + ");" + ] + }, + { + "cell_type": "markdown", + "id": "f6c43a43", + "metadata": {}, + "source": [ + "## Functions Defined using SQL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92c13036", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP FUNCTION IF EXISTS add_vertex;\n", + "CREATE FUNCTION add_vertex(lbl VARCHAR) RETURNS INTEGER AS\n", + " 'INSERT INTO vertices\n", + " VALUES ((SELECT COALESCE(MAX(id), 0) + 1 FROM vertices),\n", + " $1);\n", + " SELECT id FROM vertices WHERE label = $1;'\n", + " LANGUAGE SQL\n", + " RETURNS NULL ON NULL INPUT;" + ] + }, + { + "cell_type": "markdown", + "id": "f63026c2", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "This can help avoid the overhead of running the logic in an application which has to communicate with the DBMS." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7aa2d300", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT add_vertex('v1');" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9fd3a11b", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT * FROM vertices;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc3194cd", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT add_vertex(NULL);" + ] + }, + { + "cell_type": "markdown", + "id": "bb9d0089", + "metadata": {}, + "source": [ + "### Dollar-Quoting in Postgres" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac684702", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT $mytag$I am a 'string' constant with double dollar signs ($$).$mytag$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5331bdc0", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT $$\n", + "I am a multiline 'string' constant.\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88e008f2", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "-- The same effect, uses `$$'.\n", + "DROP FUNCTION IF EXISTS add_vertex;\n", + "CREATE FUNCTION add_vertex(lbl VARCHAR) RETURNS INTEGER AS $$\n", + " INSERT INTO vertices\n", + " VALUES ((SELECT COALESCE(MAX(id), 0) + 1 FROM vertices),\n", + " lbl);\n", + " SELECT id FROM vertices WHERE label = lbl;\n", + " $$ LANGUAGE SQL\n", + " RETURNS NULL ON NULL INPUT;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4fbead01", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT add_vertex('v2');" + ] + }, + { + "cell_type": "markdown", + "id": "2204aa43", + "metadata": {}, + "source": [ + "## Functions and Procedures Defined Using PL/pgSQL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37b73de4", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP FUNCTION IF EXISTS add_vertex;\n", + "CREATE FUNCTION add_vertex(lbl VARCHAR) RETURNS INTEGER AS $$\n", + " DECLARE new_id INTEGER;\n", + " BEGIN\n", + " SELECT COALESCE(MAX(id), 0) + 1\n", + " INTO new_id\n", + " FROM vertices;\n", + "\n", + " INSERT INTO vertices\n", + " VALUES (new_id, lbl);\n", + " \n", + " RETURN new_id;\n", + " END\n", + " $$ LANGUAGE plpgsql\n", + " RETURNS NULL ON NULL INPUT;" + ] + }, + { + "cell_type": "markdown", + "id": "8280af14", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Only SQL is standardized, procedural extensions are not. Other DBMSes have their own procedural languages.\n", + "\n", + "Postgres, however, tries to make porting of code from Oracle's procedural language easy.\n", + "\n", + "Keyword `STRICT` can be used in place of `RETURNS NULL ON NULL INPUT`. Effect is the same." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42a32c16", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT add_vertex('v3');" + ] + }, + { + "cell_type": "markdown", + "id": "49db4957", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Run twice to see a unique violation error." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f7717c8", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT * FROM vertices;" + ] + }, + { + "cell_type": "markdown", + "id": "6633fdd9", + "metadata": {}, + "source": [ + "### Error Trapping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8bab70b", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP FUNCTION IF EXISTS add_vertex;\n", + "CREATE FUNCTION add_vertex(lbl VARCHAR) RETURNS INTEGER AS $$\n", + " DECLARE new_id INTEGER;\n", + " BEGIN\n", + " SELECT COALESCE(MAX(id), 0) + 1\n", + " INTO new_id\n", + " FROM vertices;\n", + "\n", + " INSERT INTO vertices\n", + " VALUES (new_id, lbl);\n", + " \n", + " RETURN new_id;\n", + " EXCEPTION\n", + " WHEN unique_violation THEN\n", + " RETURN add_vertex(lbl || '.');\n", + " END\n", + " $$ LANGUAGE plpgsql\n", + " RETURNS NULL ON NULL INPUT;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a104c6ab", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT add_vertex('v3');" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c4e900a", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT * FROM vertices;" + ] + }, + { + "cell_type": "markdown", + "id": "a5c8e16b", + "metadata": {}, + "source": [ + "Error codes can be looked up in the documentation:\n", + "\n", + "https://postgresql.org/docs/current/errcodes-appendix.html" + ] + }, + { + "cell_type": "markdown", + "id": "7cf8f055", + "metadata": {}, + "source": [ + "### Conditionals" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5864bc80", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP FUNCTION IF EXISTS add_edge;\n", + "CREATE FUNCTION add_edge(lbl_from VARCHAR, lbl_to VARCHAR)\n", + " RETURNS BOOLEAN AS $$\n", + " DECLARE\n", + " already_present BOOLEAN;\n", + " from_id_requested INTEGER;\n", + " to_id_requested INTEGER;\n", + " BEGIN\n", + " SELECT id\n", + " INTO from_id_requested\n", + " FROM vertices\n", + " WHERE label = lbl_from;\n", + "\n", + " SELECT id\n", + " INTO to_id_requested\n", + " FROM vertices\n", + " WHERE label = lbl_to;\n", + "\n", + " SELECT EXISTS(\n", + " SELECT *\n", + " FROM edges\n", + " WHERE (from_id, to_id) =\n", + " (from_id_requested, to_id_requested)\n", + " )\n", + " INTO already_present;\n", + " \n", + " IF already_present THEN\n", + " RETURN FALSE;\n", + " ELSE\n", + " INSERT INTO edges (from_id, to_id)\n", + " VALUES (from_id_requested, to_id_requested);\n", + " END IF;\n", + " \n", + " RETURN TRUE;\n", + " END\n", + " $$ LANGUAGE plpgsql;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f19e3072", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT add_edge('v1', 'v2');" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cbbd4322", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT * FROM edges;" + ] + }, + { + "cell_type": "markdown", + "id": "c0231942", + "metadata": {}, + "source": [ + "### Procedures vs Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8527ee3", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP FUNCTION IF EXISTS add_edge;\n", + "DROP PROCEDURE IF EXISTS add_edge;\n", + "CREATE PROCEDURE add_edge(lbl_from VARCHAR, lbl_to VARCHAR)\n", + " AS $$\n", + " DECLARE\n", + " already_present BOOLEAN;\n", + " from_id_requested INTEGER;\n", + " to_id_requested INTEGER;\n", + " BEGIN\n", + " SELECT id\n", + " INTO from_id_requested\n", + " FROM vertices\n", + " WHERE label = lbl_from;\n", + "\n", + " SELECT id\n", + " INTO to_id_requested\n", + " FROM vertices\n", + " WHERE label = lbl_to;\n", + "\n", + " SELECT EXISTS(\n", + " SELECT *\n", + " FROM edges\n", + " WHERE (from_id, to_id) =\n", + " (from_id_requested, to_id_requested)\n", + " )\n", + " INTO already_present;\n", + " \n", + " IF NOT already_present THEN\n", + " INSERT INTO edges (from_id, to_id)\n", + " VALUES (from_id_requested, to_id_requested);\n", + " END IF;\n", + " END\n", + " $$ LANGUAGE plpgsql;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e438d460", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT add_edge('v3', 'v1');" + ] + }, + { + "cell_type": "markdown", + "id": "d90c16cd", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "The above gives an error, because procedures cannot be called inside expressions. We need to use the `CALL` command." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "deed50ff", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "CALL add_edge('v3', 'v1');" + ] + }, + { + "cell_type": "markdown", + "id": "e4218120", + "metadata": {}, + "source": [ + "### Default Argument Values and Assignments" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44cf8ef7", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP PROCEDURE IF EXISTS add_edge(VARCHAR, VARCHAR);\n", + "DROP PROCEDURE IF EXISTS add_edge(VARCHAR, VARCHAR, VARCHAR);\n", + "CREATE PROCEDURE add_edge(lbl_from VARCHAR,\n", + " lbl_to VARCHAR,\n", + " edge_lbl VARCHAR DEFAULT NULL)\n", + " AS $$\n", + " DECLARE\n", + " already_present BOOLEAN;\n", + " from_id_requested INTEGER;\n", + " to_id_requested INTEGER;\n", + " BEGIN\n", + " SELECT id\n", + " INTO from_id_requested\n", + " FROM vertices\n", + " WHERE label = lbl_from;\n", + "\n", + " SELECT id\n", + " INTO to_id_requested\n", + " FROM vertices\n", + " WHERE label = lbl_to;\n", + " \n", + " edge_lbl := COALESCE(edge_lbl, lbl_from || ' -> ' || lbl_to);\n", + "\n", + " SELECT EXISTS(\n", + " SELECT *\n", + " FROM edges\n", + " WHERE (from_id, to_id) =\n", + " (from_id_requested, to_id_requested)\n", + " )\n", + " INTO already_present;\n", + " \n", + " IF NOT already_present THEN\n", + " INSERT INTO edges (from_id, to_id, label)\n", + " VALUES (from_id_requested, to_id_requested, edge_lbl);\n", + " END IF;\n", + " END\n", + "$$ LANGUAGE plpgsql;" + ] + }, + { + "cell_type": "markdown", + "id": "783fe9f2", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Note that there can be multiple PL/pgSQL functions with the same name but different argument counts/types. Where ambigious, we need to specify the argument types in `DROP FUNCTION` command." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e98a4fe", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "CALL add_edge('v1', 'v3');" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c47bfca8", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "CALL add_edge('v2', 'v3', 'MyEdge');" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23bed71b", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT * FROM edges;" + ] + }, + { + "cell_type": "markdown", + "id": "ab2dbe93", + "metadata": {}, + "source": [ + "### Iteration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69e40a42", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "CREATE OR REPLACE FUNCTION my_loop() RETURNS INT AS $$\n", + " DECLARE\n", + " number INT := 0;\n", + " sum INT := 0;\n", + " BEGIN\n", + " LOOP\n", + " sum := sum + number;\n", + " number := number + 1;\n", + " IF number > 100 THEN\n", + " EXIT; -- break out of loop\n", + " END IF;\n", + " END LOOP;\n", + "\n", + " RETURN sum;\n", + " END\n", + "$$ LANGUAGE plpgsql" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5a913e3", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT my_loop()" + ] + }, + { + "cell_type": "markdown", + "id": "0c15921b", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Instead of the `IF` construct, we can also use the following to break from the loop.\n", + "\n", + "```sql\n", + "EXIT WHEN number > 100;\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "5aa71ece", + "metadata": {}, + "source": [ + "### Cursors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "196a1267", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DELETE FROM edges;\n", + "DELETE FROM vertices;\n", + "\n", + "SELECT add_vertex('vA');\n", + "SELECT add_vertex('vB');\n", + "SELECT add_vertex('vC');\n", + "SELECT add_vertex('vD');\n", + "SELECT add_vertex('vE');\n", + "SELECT add_vertex('vF');\n", + "SELECT add_vertex('vG');" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30f08d0b", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP FUNCTION IF EXISTS list_vertices;\n", + "CREATE FUNCTION list_vertices() RETURNS VARCHAR AS $$\n", + " DECLARE\n", + " current_label VARCHAR;\n", + " result VARCHAR := '';\n", + " first BOOLEAN := TRUE;\n", + " cur REFCURSOR;\n", + " BEGIN\n", + " OPEN cur FOR SELECT label FROM vertices;\n", + "\n", + " LOOP\n", + " FETCH FROM cur INTO current_label; \n", + "\n", + " EXIT WHEN current_label IS NULL;\n", + "\n", + " IF first THEN\n", + " result := current_label;\n", + " ELSE\n", + " result := result || ', ' || current_label;\n", + " END IF;\n", + " first := FALSE;\n", + " END LOOP;\n", + "\n", + " CLOSE cur;\n", + "\n", + " RETURN result;\n", + " END\n", + "$$ LANGUAGE plpgsql;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dce99e7c", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT list_vertices();" + ] + }, + { + "cell_type": "markdown", + "id": "9c4cdc48", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "`FETCH`ing from a cursor also implicitly sets a variable named `FOUND`. We can test the value of `FOUND` instead of checking if we fetched a `NULL` row.\n", + "\n", + "Note the `CLOSE` instruction. It allows the resources related to cursor to be released and allows the cursor variable to be used with `OPEN` again.\n", + "\n", + "In the `OPEN` instruction we **bound** the cursor variable to a query. We can instead create a cursor variable that is already bound at creation time. We need to declare it as `REFCURSOR` rather than `CURSOR`.\n", + "\n", + "If we have a `CURSOR` variable, we can use it with a variant of `OPEN` without `FOR`, as below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43676203", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP FUNCTION IF EXISTS list_vertices;\n", + "CREATE FUNCTION list_vertices() RETURNS VARCHAR AS $$\n", + " DECLARE\n", + " current_label VARCHAR;\n", + " result VARCHAR := '';\n", + " first BOOLEAN := TRUE;\n", + " cur CURSOR FOR SELECT label FROM vertices;\n", + " BEGIN\n", + " OPEN cur;\n", + "\n", + " LOOP\n", + " FETCH FROM cur INTO current_label; \n", + "\n", + " EXIT WHEN current_label IS NULL;\n", + "\n", + " IF first THEN\n", + " result := current_label;\n", + " ELSE\n", + " result := result || ', ' || current_label;\n", + " END IF;\n", + " first := FALSE;\n", + " END LOOP;\n", + "\n", + " CLOSE cur;\n", + "\n", + " RETURN result;\n", + " END\n", + "$$ LANGUAGE plpgsql;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa4c78a1", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT list_vertices();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "020a0d03", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT 'wxy'\n", + "UNION\n", + "SELECT 'xyz'" + ] + }, + { + "cell_type": "markdown", + "id": "6593383a", + "metadata": {}, + "source": [ + "### \"RECORD\" Type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d040e18e", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP FUNCTION IF EXISTS record_example;\n", + "CREATE FUNCTION record_example() RETURNS VARCHAR AS $$\n", + " DECLARE\n", + " rec RECORD;\n", + " BEGIN\n", + " SELECT * INTO rec FROM vertices LIMIT 1;\n", + " \n", + " RETURN rec.label || '(' || rec.id || ')';\n", + " END\n", + "$$ LANGUAGE plpgsql;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31c3563c", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT record_example()" + ] + }, + { + "cell_type": "markdown", + "id": "e929d5c5", + "metadata": {}, + "source": [ + "### \"FOR … IN\" Loops" + ] + }, + { + "cell_type": "markdown", + "id": "e07dd597", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "We can use a `CURSOR` (but not a `REFCURSOR`) with a special variant of the `LOOP` construct." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0f5ef4f", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP FUNCTION IF EXISTS list_vertices;\n", + "CREATE FUNCTION list_vertices() RETURNS VARCHAR AS $$\n", + " DECLARE\n", + " result VARCHAR := '';\n", + " first BOOLEAN := TRUE;\n", + " cur CURSOR FOR SELECT label FROM vertices;\n", + " BEGIN\n", + " FOR record IN cur LOOP\n", + " IF first THEN\n", + " result := record.label;\n", + " ELSE\n", + " result := result || ', ' || record.label;\n", + " END IF;\n", + " first := FALSE;\n", + " END LOOP;\n", + "\n", + " return result;\n", + " END\n", + "$$ LANGUAGE plpgsql;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e0bd1e4", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT list_vertices();" + ] + }, + { + "cell_type": "markdown", + "id": "a9d8d3f2", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Finally, we can omit the declaration of a cursor variable and put query code in the loop construct." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "147edc94", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP FUNCTION IF EXISTS list_vertices;\n", + "CREATE FUNCTION list_vertices() RETURNS VARCHAR AS $$\n", + " DECLARE\n", + " result VARCHAR := '';\n", + " first BOOLEAN := TRUE;\n", + " record RECORD;\n", + " BEGIN\n", + " FOR record IN SELECT label FROM vertices LOOP\n", + " IF first THEN\n", + " result := record.label;\n", + " ELSE\n", + " result := result || ', ' || record.label;\n", + " END IF;\n", + " first := FALSE;\n", + " END LOOP;\n", + "\n", + " return result;\n", + " END\n", + "$$ LANGUAGE plpgsql;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abf479c6", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT list_vertices();" + ] + }, + { + "cell_type": "markdown", + "id": "f0430572", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Note that we now needed to explicitly declare the cursor variable (which was not required before).\n", + "\n", + "It happens to be also possible to create a list like this with an aggregation function built into Postgres, `STRING_AGG()`.\n", + "\n", + "```sql\n", + "SELECT STRING_AGG(label, ', ') FROM vertices;\n", + "```\n", + "\n", + "Note, however, that a loop in PL/pgSQL allows us to control the order of the values that are processed (we can use `ORDER BY` in the query)." + ] + }, + { + "cell_type": "markdown", + "id": "bb8a18c6", + "metadata": {}, + "source": [ + "### Sets of Values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29327330", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP FUNCTION IF EXISTS set_example;\n", + "CREATE FUNCTION set_example() RETURNS SETOF TEXT AS $$\n", + " BEGIN\n", + " RETURN NEXT 'foo';\n", + " RETURN NEXT 'bar';\n", + " RETURN NEXT 'baz';\n", + " END\n", + "$$ LANGUAGE plpgsql;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23701f09", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT set_example()" + ] + }, + { + "cell_type": "markdown", + "id": "231b572f", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Try also adding the following above the `RETURN NEXT` lines.\n", + "\n", + "```sql\n", + " RETURN QUERY SELECT 'wxy'\n", + " UNION\n", + " SELECT 'xyz';\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "b0444eaf", + "metadata": {}, + "source": [ + "### \"FETCH\" and \"MOVE\" Commands" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dea00db4", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP FUNCTION IF EXISTS fetch_examples;\n", + "CREATE FUNCTION fetch_examples() RETURNS SETOF TEXT AS $$\n", + " DECLARE\n", + " cur CURSOR FOR SELECT label, id\n", + " FROM vertices\n", + " ORDER BY label;\n", + " _label TEXT;\n", + " _id INT;\n", + " BEGIN\n", + " OPEN cur;\n", + "\n", + " FETCH LAST FROM cur INTO _label, _id;\n", + " RETURN NEXT _label || ' (' || _id || ')';\n", + "\n", + " CLOSE cur;\n", + " END\n", + "$$ LANGUAGE plpgsql;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8224dd77", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT fetch_examples()" + ] + }, + { + "cell_type": "markdown", + "id": "5d7e6049", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Add the following.\n", + "\n", + "```sql\n", + " MOVE FORWARD 1 IN cur;\n", + " FETCH FROM cur INTO _label, _id;\n", + " RETURN NEXT _label || ' (' || _id || ')';\n", + "```\n", + "\n", + "See that `MOVE` allowed us to skip a row.\n", + "\n", + "Add the following.\n", + "\n", + "```sql\n", + " MOVE LAST IN cur;\n", + " FETCH FROM cur INTO _label, _id;\n", + " RETURN NEXT _label || ' (' || _id || ')';\n", + "```\n", + "\n", + "See that the cursor got positioned **after** the final row and subsequent `FETCH` returned a record with `NULL`s.\n", + "\n", + "Add the following.\n", + "\n", + "```sql\n", + " MOVE LAST IN cur;\n", + " MOVE RELATIVE -1 IN cur;\n", + " FETCH FROM cur INTO _label, _id;\n", + " RETURN NEXT _label || ' (' || _id || ')';\n", + "```\n", + "\n", + "See that we were able to seek backward and fetch the last row of the query. For some queries Postgres might be **by default** unable to move the cursor backward. The keyword `SCROLL` can be added after cursor name in its declaration to mandate that backward seeks are possible, as below.\n", + "\n", + "```sql\n", + "cur SCROLL CURSOR FOR SELECT 'something';\n", + "```\n", + "\n", + "Now, replace the most recently added sequence of `MOVE`&`FETCH` lines and instead add the following, functionally equivalent code.\n", + "\n", + "```sql\n", + " MOVE LAST IN cur;\n", + " FETCH RELATIVE 0 FROM cur INTO _label, _id;\n", + "```\n", + "\n", + "Now, replace it with yet shorter, equivalent line.\n", + "\n", + "```sql\n", + " FETCH LAST FROM cur INTO _label, _id;\n", + "```\n", + "\n", + "As you see, the clauses `LAST`, `RELATIVE *count*`, `FORWARD *count*` and others work both with `FETCH` and `MOVE`." + ] + }, + { + "cell_type": "markdown", + "id": "b167cd89", + "metadata": {}, + "source": [ + "#### Using Cursors in Nonprocedural SQL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3de29f0f", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DECLARE sql_cur CURSOR WITH HOLD FOR SELECT * FROM vertices" + ] + }, + { + "cell_type": "markdown", + "id": "3d9fb446", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "The SQL standard does define cursors as well and they can be used outside user-defined functions. Note that they are meant by the standard to be used by other software, not by humans interacting with the database through, say, a shell.\n", + "\n", + "Cursors are by default closed automatically by the end of transaction. An SQL cursor can be declared `WITH HOLD` to change this behavior and instead have it persist after transaction. We leverage this to play with a cursor from within the Jupyter Notebook, which does not currently handle transactions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab34fbab", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "FETCH FORWARD 2 FROM sql_cur" + ] + }, + { + "cell_type": "markdown", + "id": "9a8dce96", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "We can now provide a count after `FORWARD` to fetch more than one row at once. This was not possible with a cursor in PL/pgSQL." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "623baa58", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "MOVE RELATIVE 2 IN sql_cur" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd5d8868", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "FETCH FORWARD 2 FROM sql_cur" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "281e2cde", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "FETCH ALL IN sql_cur" + ] + }, + { + "cell_type": "markdown", + "id": "7d62c69d", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "The `FETCH ALL`, as used above, is also not available in PL/pgSQL." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "069a051f", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "CLOSE sql_cur" + ] + }, + { + "cell_type": "markdown", + "id": "6094d7c3", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Only now can a cursor with name `sql_cur` be declared again." + ] + }, + { + "cell_type": "markdown", + "id": "a503ec5b", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "The `SCROLL` option works analogously as in PL/pgSQL.\n", + "\n", + "It is also possible to return a cursor (or a set of them!) from a PL/pgSQL function and use it from SQL." + ] + }, + { + "cell_type": "markdown", + "id": "a0bdac01", + "metadata": {}, + "source": [ + "### Simple Graph Algorithm Example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6a9e761", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "%%sql\n", + "CALL add_edge('vA', 'vB');\n", + "CALL add_edge('vA', 'vC');\n", + "CALL add_edge('vB', 'vC');\n", + "CALL add_edge('vB', 'vE');\n", + "CALL add_edge('vE', 'vF');\n", + "CALL add_edge('vE', 'vA');\n", + "CALL add_edge('vD', 'vC');\n", + "CALL add_edge('vF', 'vC');" + ] + }, + { + "cell_type": "markdown", + "id": "4d0bd5dd", + "metadata": {}, + "source": [ + "Task: find all vertices reachable from given vertex." + ] + }, + { + "cell_type": "markdown", + "id": "39b40891", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "This would be difficult to achieve with nonprocedural SQL. BFS and DFS algorithms both utilize iteration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84592ab9", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP FUNCTION IF EXISTS find_reachable;\n", + "CREATE FUNCTION find_reachable(start_label VARCHAR)\n", + " RETURNS SETOF VARCHAR AS $$\n", + " BEGIN\n", + " CREATE TEMPORARY TABLE __current_ids AS\n", + " SELECT id FROM vertices WHERE label = start_label;\n", + "\n", + " CREATE TEMPORARY TABLE __found_ids AS\n", + " SELECT * FROM __current_ids;\n", + "\n", + " LOOP\n", + " CREATE TEMPORARY TABLE __tmp_ids AS\n", + " SELECT DISTINCT to_id\n", + " FROM __current_ids JOIN edges ON id = from_id\n", + " WHERE to_id NOT IN (SELECT * FROM __found_ids);\n", + "\n", + " EXIT WHEN NOT EXISTS (SELECT * FROM __tmp_ids);\n", + "\n", + " DELETE FROM __current_ids;\n", + " INSERT INTO __found_ids SELECT * FROM __tmp_ids;\n", + " INSERT INTO __current_ids SELECT * FROM __tmp_ids;\n", + " DROP TABLE __tmp_ids;\n", + " END LOOP;\n", + "\n", + " RETURN QUERY SELECT label\n", + " FROM __found_ids NATURAL JOIN vertices;\n", + "\n", + " DROP TABLE __found_ids;\n", + " DROP TABLE __current_ids;\n", + " DROP TABLE __tmp_ids;\n", + " END\n", + "$$ LANGUAGE plpgsql;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "651ca228", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT find_reachable('vA')" + ] + }, + { + "cell_type": "markdown", + "id": "8f0a39cb", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Note that something similar could also be achieved with nonprocedural SQL using a recursive `WITH` query (not discussed before in this course). An example is presented below.\n", + "\n", + "```sql\n", + "WITH RECURSIVE reachable_ids AS (\n", + " SELECT id FROM vertices WHERE label = 'vE'\n", + " UNION\n", + " SELECT DISTINCT to_id\n", + " FROM reachable_ids JOIN edges ON (id = from_id)\n", + ")\n", + "SELECT label FROM reachable_ids NATURAL JOIN vertices;\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "1921b86c", + "metadata": {}, + "source": [ + "## Support for Other Procedural Languages" + ] + }, + { + "cell_type": "markdown", + "id": "d9588667", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Examples for computing the greatest common denominator." + ] + }, + { + "cell_type": "markdown", + "id": "d7dd04dc", + "metadata": {}, + "source": [ + "Tcl\n", + "\n", + "```sql\n", + "CREATE EXTENSION pltcl;\n", + "\n", + "CREATE FUNCTION gcd(a INT, b INT) RETURNS INT AS $$\n", + "```\n", + "```tcl\n", + " if {$b > $a} {lassign \"$b $a\" b a}\n", + "\n", + " while {$a % $b > 0} {\n", + " lassign \"$b [expr {$a % $b}]\" a b\n", + " }\n", + "\n", + " return $b\n", + "```\n", + "```sql\n", + "$$ LANGUAGE pltcl STRICT;\n", + "\n", + "SELECT gcd(45, 72);\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "2df8b161", + "metadata": {}, + "source": [ + "and Python\n", + "\n", + "```sql\n", + "CREATE EXTENSION plpython3u;\n", + "\n", + "CREATE FUNCTION gcd(a INT, b INT) RETURNS INT AS $$\n", + "```\n", + "```python\n", + " if b > a:\n", + " a, b = b, a\n", + "\n", + " while a % b:\n", + " a, b = b, a%b\n", + "\n", + " return b\n", + "```\n", + "```sql\n", + "$$ LANGUAGE plpython3u STRICT;\n", + "\n", + "SELECT gcd(45, 72);\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "daf605d7", + "metadata": {}, + "source": [ + "## Triggers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7367ff8", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP TABLE IF EXISTS edge_changes;\n", + "CREATE TABLE edge_changes (\n", + " change_id SERIAL PRIMARY KEY,\n", + " from_id INT,\n", + " to_id INT,\n", + " label VARCHAR,\n", + " \"when\" TIMESTAMP,\n", + " change VARCHAR(7),\n", + " CHECK (change IN ('added', 'removed'))\n", + ");\n", + "\n", + "CREATE OR REPLACE FUNCTION log_edge_change()\n", + " RETURNS TRIGGER AS $$\n", + " BEGIN\n", + " IF old = new THEN\n", + " RETURN NULL;\n", + " END IF;\n", + "\n", + " IF old.from_id IS NOT NULL THEN\n", + " INSERT INTO edge_changes\n", + " (from_id, to_id, label,\n", + " \"when\", change)\n", + " VALUES (old.from_id, old.to_id, old.label,\n", + " NOW(), 'removed');\n", + " END IF;\n", + "\n", + " IF new.from_id IS NOT NULL THEN\n", + " INSERT INTO edge_changes\n", + " (from_id, to_id, label,\n", + " \"when\", change)\n", + " VALUES (new.from_id, new.to_id, new.label,\n", + " NOW(), 'added');\n", + " END IF;\n", + "\n", + " RETURN NULL;\n", + " END\n", + "$$ LANGUAGE plpgsql;\n", + "\n", + "CREATE OR REPLACE TRIGGER log_change\n", + " AFTER INSERT OR DELETE OR UPDATE ON edges\n", + " FOR EACH ROW\n", + " EXECUTE FUNCTION log_edge_change();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "068144f2", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT * FROM edge_changes;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5effd28", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "-- A no-op, should not generate a log entry.\n", + "UPDATE edges\n", + "SET to_id = to_id\n", + "WHERE to_id = 2;\n", + "\n", + "-- Deletion, should generate log entries.\n", + "DELETE FROM edges;" + ] + }, + { + "cell_type": "markdown", + "id": "df25ef7a", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Re-run the earlier `SELECT` to see the logged changes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6838443", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "CALL add_edge('vA', 'vB');\n", + "CALL add_edge('vA', 'vC');\n", + "CALL add_edge('vB', 'vC');\n", + "CALL add_edge('vB', 'vE');\n", + "CALL add_edge('vE', 'vF');\n", + "CALL add_edge('vE', 'vA');\n", + "CALL add_edge('vD', 'vC');\n", + "CALL add_edge('vF', 'vC');" + ] + }, + { + "cell_type": "markdown", + "id": "77e8d81d", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Re-run the earlier `SELECT` to see the logged changes." + ] + }, + { + "cell_type": "markdown", + "id": "0a6f4267", + "metadata": {}, + "source": [ + "### \"INSTEAD OF\" Triggers and Trigger-Updatable Views" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b63cb74", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP VIEW IF EXISTS is_edge;\n", + "CREATE VIEW is_edge AS\n", + "SELECT l.id AS from_id,\n", + " l.label AS from_label,\n", + " r.id AS to_id,\n", + " r.label AS to_label,\n", + " e.to_id IS NOT NULL AS present\n", + "FROM vertices l\n", + " CROSS JOIN vertices r\n", + " LEFT JOIN edges e ON (l.id, r.id) = (e.from_id, e.to_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14ccb9d4", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT * FROM is_edge" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0115d614", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "UPDATE is_edge\n", + "SET present = TRUE\n", + "WHERE (from_label, to_label) = ('vF', 'vG')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73eacf7f", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "CREATE OR REPLACE FUNCTION update_is_edge()\n", + " RETURNS TRIGGER AS $$\n", + " BEGIN\n", + " IF new.present THEN\n", + " CALL add_edge(old.from_label, old.to_label);\n", + " ELSE\n", + " DELETE FROM edges\n", + " WHERE (from_id, to_id) = (old.from_id, old.to_id);\n", + " END IF;\n", + "\n", + " RETURN NULL;\n", + " END\n", + "$$ LANGUAGE plpgsql;\n", + "\n", + "CREATE OR REPLACE TRIGGER handle_update\n", + " INSTEAD OF UPDATE ON is_edge\n", + " FOR EACH ROW\n", + " EXECUTE FUNCTION update_is_edge();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0b89edc", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "UPDATE is_edge\n", + "SET present = TRUE\n", + "WHERE (from_label, to_label) = ('vF', 'vG')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ffd1b04", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT * FROM edge_changes" + ] + }, + { + "cell_type": "markdown", + "id": "f9048743", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "We see the log of changes made by our `INSTEAD OF` trigger." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "242432a2", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT * FROM edges" + ] + }, + { + "cell_type": "markdown", + "id": "890dedaf", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "We see the row added through view update." + ] + }, + { + "cell_type": "markdown", + "id": "316d6132", + "metadata": {}, + "source": [ + "#### Examples with Sanity Checks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6bab5c17", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "CREATE OR REPLACE FUNCTION update_is_edge()\n", + " RETURNS TRIGGER AS $$\n", + " BEGIN\n", + " IF (old.from_id, old.from_label, old.to_id, old.to_label)\n", + " <>\n", + " (new.from_id, new.from_label, new.to_id, new.to_label)\n", + " THEN\n", + " RAISE EXCEPTION\n", + " 'Illegal update (from_id = %, to_id = %)',\n", + " old.from_id, old.to_id\n", + " USING HINT = 'Please only set the `present'' column';\n", + " END IF;\n", + "\n", + " IF new.present THEN\n", + " CALL add_edge(old.from_label, old.to_label);\n", + " ELSE\n", + " DELETE FROM edges\n", + " WHERE (from_id, to_id) = (old.from_id, old.to_id);\n", + " END IF;\n", + "\n", + " RETURN NULL;\n", + " END\n", + "$$ LANGUAGE plpgsql;\n", + "\n", + "CREATE OR REPLACE TRIGGER handle_update\n", + " INSTEAD OF UPDATE ON is_edge\n", + " FOR EACH ROW\n", + " EXECUTE FUNCTION update_is_edge();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "58973eac", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "UPDATE is_edge\n", + "SET to_label = 'vE'\n", + "WHERE (from_label, to_label) = ('vF', 'vG')" + ] + }, + { + "cell_type": "markdown", + "id": "30aa6925", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Above we see the effect of our `RAISE` instruction in trigger's code." + ] + }, + { + "cell_type": "markdown", + "id": "6eccd424", + "metadata": {}, + "source": [ + "If we do not need a custom exception message, we can use the `ASSERT` command." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db872df4", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "CREATE OR REPLACE FUNCTION update_is_edge()\n", + " RETURNS TRIGGER AS $$\n", + " BEGIN\n", + " ASSERT COALESCE(\n", + " (old.from_id, old.from_label, old.to_id, old.to_label)\n", + " =\n", + " (new.from_id, new.from_label, new.to_id, new.to_label),\n", + " TRUE\n", + " );\n", + "\n", + " IF new.present THEN\n", + " CALL add_edge(old.from_label, old.to_label);\n", + " ELSE\n", + " DELETE FROM edges\n", + " WHERE (from_id, to_id) = (old.from_id, old.to_id);\n", + " END IF;\n", + "\n", + " RETURN NULL;\n", + " END\n", + "$$ LANGUAGE plpgsql;\n", + "\n", + "CREATE OR REPLACE TRIGGER handle_update\n", + " INSTEAD OF UPDATE ON is_edge\n", + " FOR EACH ROW\n", + " EXECUTE FUNCTION update_is_edge();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "998ce4af", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "UPDATE is_edge\n", + "SET to_label = 'vE'\n", + "WHERE (from_label, to_label) = ('vF', 'vG')" + ] + }, + { + "cell_type": "markdown", + "id": "645362b3", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "The above exception was generated by the `ASSERT` command." + ] + }, + { + "cell_type": "markdown", + "id": "365fcfc8", + "metadata": {}, + "source": [ + "See that valid updates still do work." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3345ae83", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "UPDATE is_edge\n", + "SET present = FALSE\n", + "WHERE (from_label, to_label) = ('vF', 'vG')" + ] + }, + { + "cell_type": "markdown", + "id": "c921b3a6", + "metadata": {}, + "source": [ + "### Trigger-Implemented Integrity Constraints" + ] + }, + { + "cell_type": "markdown", + "id": "7ff12f0e", + "metadata": {}, + "source": [ + "The cycle detection algorithm below has been derived from Kahn's.\n", + "\n", + "https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17cf214b", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "%%sql\n", + "-- Make sure we have no edges initially.\n", + "DELETE FROM edges;\n", + "\n", + "CREATE OR REPLACE FUNCTION verify_acyclic()\n", + " RETURNS TRIGGER AS $$\n", + " BEGIN\n", + " RAISE WARNING 'Checking for cycles';\n", + " -- Prepare a working copy of edges that we'll modify.\n", + " DROP TABLE IF EXISTS __edges_left;\n", + " CREATE TEMPORARY TABLE __edges_left AS\n", + " SELECT from_id, to_id FROM edges;\n", + "\n", + " LOOP\n", + " EXIT WHEN NOT EXISTS (SELECT * FROM __edges_left);\n", + "\n", + " -- Find vertices that have no incoming edges.\n", + " DROP TABLE IF EXISTS __source_vertices;\n", + " CREATE TEMPORARY TABLE __source_vertices\n", + " AS SELECT _out.from_id\n", + " FROM __edges_left _in\n", + " RIGHT JOIN __edges_left _out\n", + " ON (_in.to_id = _out.from_id)\n", + " WHERE _in.to_id IS NULL;\n", + "\n", + " IF NOT EXISTS (SELECT * FROM __source_vertices) THEN\n", + " -- There is no vertex without incoming edge.\n", + " RAISE EXCEPTION 'Cycle detected';\n", + " END IF;\n", + "\n", + " DELETE FROM __edges_left\n", + " WHERE from_id IN (SELECT * FROM __source_vertices);\n", + " END LOOP;\n", + "\n", + " DROP TABLE __edges_left;\n", + " DROP TABLE IF EXISTS source_vertices;\n", + "\n", + " RETURN NULL;\n", + " END\n", + "$$ LANGUAGE plpgsql;\n", + "\n", + "CREATE OR REPLACE TRIGGER forbid_cycles\n", + " AFTER INSERT OR UPDATE on edges\n", + " FOR EACH ROW\n", + " EXECUTE FUNCTION verify_acyclic();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b1405a5", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "CALL add_edge('vA', 'vB');\n", + "CALL add_edge('vA', 'vC');\n", + "CALL add_edge('vB', 'vC');\n", + "CALL add_edge('vB', 'vE');\n", + "CALL add_edge('vE', 'vF');" + ] + }, + { + "cell_type": "markdown", + "id": "fa81c7b7", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "The above succeeds, there are no edges yet." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aebee679", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "-- Would create cycle vA → vB → vE → vA.\n", + "CALL add_edge('vE', 'vA');" + ] + }, + { + "cell_type": "markdown", + "id": "35546575", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Our trigger has properly prevented the insertion." + ] + }, + { + "cell_type": "markdown", + "id": "8f5199ed", + "metadata": {}, + "source": [ + "#### \"WHEN\" clause" + ] + }, + { + "cell_type": "markdown", + "id": "73130fb1", + "metadata": {}, + "source": [ + "If a trigger is only meant to be run in certain cases, we can utilize the `WHEN` clause of `CREATE TRIGGER`.\n", + "\n", + "Let me first introduce you to the `IS DISTINCT FROM` operator." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ed6c225", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "-- Two NULL values are treated as non-distinct.\n", + "SELECT (5, 6, NULL) IS DISTINCT FROM (5, 6, NULL)" + ] + }, + { + "cell_type": "markdown", + "id": "4ca99236", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "`IS DISTINCT FROM` can be used to compare values or **tuples** (as in this case)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "574677cb", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT (5, 6, NULL) IS DISTINCT FROM (5, 789, NULL)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10819fa1", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "-- We'll log (non-raising) trigger firings.\n", + "DROP TABLE IF EXISTS cycle_checks;\n", + "CREATE TABLE cycle_checks (\n", + " id SERIAL PRIMARY KEY,\n", + " _when TIMESTAMP\n", + ");\n", + "\n", + "CREATE OR REPLACE FUNCTION verify_acyclic()\n", + " RETURNS TRIGGER AS $$\n", + " BEGIN\n", + " -- We have just added this `INSERT' command.\n", + " INSERT INTO cycle_checks (_when)\n", + " VALUES (NOW());\n", + "\n", + " -- Prepare a working copy of edges that we'll modify.\n", + " DROP TABLE IF EXISTS __edges_left;\n", + " CREATE TEMPORARY TABLE __edges_left AS\n", + " SELECT from_id, to_id FROM edges;\n", + "\n", + " LOOP\n", + " EXIT WHEN NOT EXISTS (SELECT * FROM __edges_left);\n", + "\n", + " -- Find vertices that have no incoming edges.\n", + " DROP TABLE IF EXISTS __source_vertices;\n", + " CREATE TEMPORARY TABLE __source_vertices\n", + " AS SELECT _out.from_id\n", + " FROM __edges_left _in\n", + " RIGHT JOIN __edges_left _out\n", + " ON (_in.to_id = _out.from_id)\n", + " WHERE _in.to_id IS NULL;\n", + "\n", + " IF NOT EXISTS (SELECT * FROM __source_vertices) THEN\n", + " -- There is no vertex without incoming edge.\n", + " RAISE EXCEPTION 'Cycle detected';\n", + " END IF;\n", + "\n", + " DELETE FROM __edges_left\n", + " WHERE from_id IN (SELECT * FROM __source_vertices);\n", + " END LOOP;\n", + "\n", + " DROP TABLE __edges_left;\n", + " DROP TABLE IF EXISTS source_vertices;\n", + "\n", + " RETURN NULL;\n", + " END\n", + "$$ LANGUAGE plpgsql;\n", + "\n", + "DROP TRIGGER IF EXISTS forbid_cycles ON edges;\n", + "\n", + "CREATE OR REPLACE TRIGGER forbid_cycles_on_insert\n", + " AFTER INSERT ON edges\n", + " FOR EACH ROW\n", + " EXECUTE FUNCTION verify_acyclic();\n", + "\n", + "CREATE OR REPLACE TRIGGER forbid_cycles_on_update\n", + " AFTER UPDATE ON edges\n", + " FOR EACH ROW\n", + " WHEN ((old.from_id, old.to_id)\n", + " IS DISTINCT FROM\n", + " (new.from_id, new.to_id))\n", + " EXECUTE FUNCTION verify_acyclic();" + ] + }, + { + "cell_type": "markdown", + "id": "d5243022", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "As shown above, we can use `WHEN` in `CREATE TRIGGER` to more concisely specify a condition for trigger firing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dcb15881", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DELETE FROM edges WHERE label = 'vA -> vB';\n", + "CALL add_edge('vA', 'vB');\n", + "SELECT * FROM cycle_checks;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aad65684", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "SELECT * FROM edges" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2551564", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "UPDATE edges\n", + "SET to_id = 1,\n", + " label = 'vB -> vA'\n", + "WHERE (from_id, to_id) = (2, 3)" + ] + }, + { + "cell_type": "markdown", + "id": "6cce7c97", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Cycles are still properly prevented." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1aaebada", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "UPDATE edges\n", + "SET label = 'vB ---> vA'\n", + "WHERE (from_id, to_id) = (2, 3);\n", + "SELECT * FROM cycle_checks;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7e9d140", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "UPDATE edges\n", + "SET label = 'vB -> vA'\n", + "WHERE (from_id, to_id) = (2, 3);\n", + "SELECT * FROM cycle_checks;" + ] + }, + { + "cell_type": "markdown", + "id": "37ad6c20", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "Irrelevant updates (label changes) give false in the `WHEN` condition and do not cause trigger firing." + ] + }, + { + "cell_type": "markdown", + "id": "d321bebc", + "metadata": {}, + "source": [ + "### \"FOR EACH STATEMENT\" Triggers" + ] + }, + { + "cell_type": "markdown", + "id": "b95f36cb", + "metadata": {}, + "source": [ + "Our `FOREACH ROW` trigger fires once for every inserted row, even if they are inserted in one operation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e86cd7fb", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DELETE FROM edges WHERE from_id = 1;\n", + "\n", + "DELETE FROM cycle_checks;\n", + "\n", + "INSERT INTO edges (from_id, to_id, label)\n", + "SELECT * FROM (VALUES (1, 2, 'vA -> vB'),\n", + " (1, 3, 'vA -> vC'));\n", + " \n", + "SELECT * FROM cycle_checks;" + ] + }, + { + "cell_type": "markdown", + "id": "1731b8df", + "metadata": {}, + "source": [ + "_note_\n", + "\n", + "Note the presence of two rows in `cycle_checks`." + ] + }, + { + "cell_type": "markdown", + "id": "590016d3", + "metadata": {}, + "source": [ + "We can avoid reundant checks." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "67a654e3", + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "DROP TRIGGER IF EXISTS forbid_cycles ON edges;\n", + "DROP TRIGGER IF EXISTS forbid_cycles_on_insert ON edges;\n", + "DROP TRIGGER IF EXISTS forbid_cycles_on_update ON edges;\n", + "\n", + "CREATE OR REPLACE TRIGGER forbid_cycles\n", + " AFTER INSERT ON edges\n", + " FOR EACH STATEMENT\n", + " EXECUTE FUNCTION verify_acyclic();" + ] + }, + { + "cell_type": "markdown", + "id": "54b0b700", + "metadata": {}, + "source": [ + "_notes_\n", + "\n", + "After re-creating the trigger as `FOR EACH STATEMENT`, re-run the `DELETE`&`DELETE`&`INSERT`&`SELECT` sequence above. See that the trigger now only fires once." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "/gnu/store/q35bxk1i5blx0rcgxphhrq9xqmgha9bz-python-3.11.11/bin/python3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} |
