aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--15-procedural-sql/procedural-sql.ipynb2313
1 files changed, 2313 insertions, 0 deletions
diff --git a/15-procedural-sql/procedural-sql.ipynb b/15-procedural-sql/procedural-sql.ipynb
new file mode 100644
index 0000000..f2894c1
--- /dev/null
+++ b/15-procedural-sql/procedural-sql.ipynb
@@ -0,0 +1,2313 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "510b6020",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import agh_db_lectures\n",
+ "agh_db_lectures.prepare_notebook_for_sql()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c4459f3f",
+ "metadata": {},
+ "source": [
+ "# Procedural SQL"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d60b5e4b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!psql --port 25432 --quiet -c \"CREATE DATABASE procedures WITH OWNER demo_user\" postgres || true\n",
+ "%sql postgresql://demo_user:demo_pwd@localhost:25432/procedures"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "efb05b48",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP TABLE IF EXISTS vertices CASCADE;\n",
+ "CREATE TABLE vertices(\n",
+ " id INT PRIMARY KEY,\n",
+ " label VARCHAR(50) UNIQUE\n",
+ ");\n",
+ "\n",
+ "DROP TABLE IF EXISTS edges CASCADE;\n",
+ "CREATE TABLE edges(\n",
+ " from_id INT,\n",
+ " to_id INT,\n",
+ " label VARCHAR(50) UNIQUE,\n",
+ " PRIMARY KEY (from_id, to_id)\n",
+ ");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f6c43a43",
+ "metadata": {},
+ "source": [
+ "## Functions Defined using SQL"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "92c13036",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP FUNCTION IF EXISTS add_vertex;\n",
+ "CREATE FUNCTION add_vertex(lbl VARCHAR) RETURNS INTEGER AS\n",
+ " 'INSERT INTO vertices\n",
+ " VALUES ((SELECT COALESCE(MAX(id), 0) + 1 FROM vertices),\n",
+ " $1);\n",
+ " SELECT id FROM vertices WHERE label = $1;'\n",
+ " LANGUAGE SQL\n",
+ " RETURNS NULL ON NULL INPUT;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f63026c2",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "This can help avoid the overhead of running the logic in an application which has to communicate with the DBMS."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7aa2d300",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT add_vertex('v1');"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9fd3a11b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT * FROM vertices;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cc3194cd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT add_vertex(NULL);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bb9d0089",
+ "metadata": {},
+ "source": [
+ "### Dollar-Quoting in Postgres"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ac684702",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT $mytag$I am a 'string' constant with double dollar signs ($$).$mytag$"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5331bdc0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT $$\n",
+ "I am a multiline 'string' constant.\n",
+ "$$"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "88e008f2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "-- The same effect, uses `$$'.\n",
+ "DROP FUNCTION IF EXISTS add_vertex;\n",
+ "CREATE FUNCTION add_vertex(lbl VARCHAR) RETURNS INTEGER AS $$\n",
+ " INSERT INTO vertices\n",
+ " VALUES ((SELECT COALESCE(MAX(id), 0) + 1 FROM vertices),\n",
+ " lbl);\n",
+ " SELECT id FROM vertices WHERE label = lbl;\n",
+ " $$ LANGUAGE SQL\n",
+ " RETURNS NULL ON NULL INPUT;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4fbead01",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT add_vertex('v2');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2204aa43",
+ "metadata": {},
+ "source": [
+ "## Functions and Procedures Defined Using PL/pgSQL"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "37b73de4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP FUNCTION IF EXISTS add_vertex;\n",
+ "CREATE FUNCTION add_vertex(lbl VARCHAR) RETURNS INTEGER AS $$\n",
+ " DECLARE new_id INTEGER;\n",
+ " BEGIN\n",
+ " SELECT COALESCE(MAX(id), 0) + 1\n",
+ " INTO new_id\n",
+ " FROM vertices;\n",
+ "\n",
+ " INSERT INTO vertices\n",
+ " VALUES (new_id, lbl);\n",
+ " \n",
+ " RETURN new_id;\n",
+ " END\n",
+ " $$ LANGUAGE plpgsql\n",
+ " RETURNS NULL ON NULL INPUT;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8280af14",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Only SQL is standardized, procedural extensions are not. Other DBMSes have their own procedural languages.\n",
+ "\n",
+ "Postgres, however, tries to make porting of code from Oracle's procedural language easy.\n",
+ "\n",
+ "Keyword `STRICT` can be used in place of `RETURNS NULL ON NULL INPUT`. Effect is the same."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "42a32c16",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT add_vertex('v3');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "49db4957",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Run twice to see a unique violation error."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8f7717c8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT * FROM vertices;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6633fdd9",
+ "metadata": {},
+ "source": [
+ "### Error Trapping"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b8bab70b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP FUNCTION IF EXISTS add_vertex;\n",
+ "CREATE FUNCTION add_vertex(lbl VARCHAR) RETURNS INTEGER AS $$\n",
+ " DECLARE new_id INTEGER;\n",
+ " BEGIN\n",
+ " SELECT COALESCE(MAX(id), 0) + 1\n",
+ " INTO new_id\n",
+ " FROM vertices;\n",
+ "\n",
+ " INSERT INTO vertices\n",
+ " VALUES (new_id, lbl);\n",
+ " \n",
+ " RETURN new_id;\n",
+ " EXCEPTION\n",
+ " WHEN unique_violation THEN\n",
+ " RETURN add_vertex(lbl || '.');\n",
+ " END\n",
+ " $$ LANGUAGE plpgsql\n",
+ " RETURNS NULL ON NULL INPUT;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a104c6ab",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT add_vertex('v3');"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6c4e900a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT * FROM vertices;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a5c8e16b",
+ "metadata": {},
+ "source": [
+ "Error codes can be looked up in the documentation:\n",
+ "\n",
+ "https://postgresql.org/docs/current/errcodes-appendix.html"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7cf8f055",
+ "metadata": {},
+ "source": [
+ "### Conditionals"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5864bc80",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP FUNCTION IF EXISTS add_edge;\n",
+ "CREATE FUNCTION add_edge(lbl_from VARCHAR, lbl_to VARCHAR)\n",
+ " RETURNS BOOLEAN AS $$\n",
+ " DECLARE\n",
+ " already_present BOOLEAN;\n",
+ " from_id_requested INTEGER;\n",
+ " to_id_requested INTEGER;\n",
+ " BEGIN\n",
+ " SELECT id\n",
+ " INTO from_id_requested\n",
+ " FROM vertices\n",
+ " WHERE label = lbl_from;\n",
+ "\n",
+ " SELECT id\n",
+ " INTO to_id_requested\n",
+ " FROM vertices\n",
+ " WHERE label = lbl_to;\n",
+ "\n",
+ " SELECT EXISTS(\n",
+ " SELECT *\n",
+ " FROM edges\n",
+ " WHERE (from_id, to_id) =\n",
+ " (from_id_requested, to_id_requested)\n",
+ " )\n",
+ " INTO already_present;\n",
+ " \n",
+ " IF already_present THEN\n",
+ " RETURN FALSE;\n",
+ " ELSE\n",
+ " INSERT INTO edges (from_id, to_id)\n",
+ " VALUES (from_id_requested, to_id_requested);\n",
+ " END IF;\n",
+ " \n",
+ " RETURN TRUE;\n",
+ " END\n",
+ " $$ LANGUAGE plpgsql;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f19e3072",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT add_edge('v1', 'v2');"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cbbd4322",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT * FROM edges;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c0231942",
+ "metadata": {},
+ "source": [
+ "### Procedures vs Functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b8527ee3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP FUNCTION IF EXISTS add_edge;\n",
+ "DROP PROCEDURE IF EXISTS add_edge;\n",
+ "CREATE PROCEDURE add_edge(lbl_from VARCHAR, lbl_to VARCHAR)\n",
+ " AS $$\n",
+ " DECLARE\n",
+ " already_present BOOLEAN;\n",
+ " from_id_requested INTEGER;\n",
+ " to_id_requested INTEGER;\n",
+ " BEGIN\n",
+ " SELECT id\n",
+ " INTO from_id_requested\n",
+ " FROM vertices\n",
+ " WHERE label = lbl_from;\n",
+ "\n",
+ " SELECT id\n",
+ " INTO to_id_requested\n",
+ " FROM vertices\n",
+ " WHERE label = lbl_to;\n",
+ "\n",
+ " SELECT EXISTS(\n",
+ " SELECT *\n",
+ " FROM edges\n",
+ " WHERE (from_id, to_id) =\n",
+ " (from_id_requested, to_id_requested)\n",
+ " )\n",
+ " INTO already_present;\n",
+ " \n",
+ " IF NOT already_present THEN\n",
+ " INSERT INTO edges (from_id, to_id)\n",
+ " VALUES (from_id_requested, to_id_requested);\n",
+ " END IF;\n",
+ " END\n",
+ " $$ LANGUAGE plpgsql;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e438d460",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT add_edge('v3', 'v1');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d90c16cd",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "The above gives an error, because procedures cannot be called inside expressions. We need to use the `CALL` command."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "deed50ff",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "CALL add_edge('v3', 'v1');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e4218120",
+ "metadata": {},
+ "source": [
+ "### Default Argument Values and Assignments"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "44cf8ef7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP PROCEDURE IF EXISTS add_edge(VARCHAR, VARCHAR);\n",
+ "DROP PROCEDURE IF EXISTS add_edge(VARCHAR, VARCHAR, VARCHAR);\n",
+ "CREATE PROCEDURE add_edge(lbl_from VARCHAR,\n",
+ " lbl_to VARCHAR,\n",
+ " edge_lbl VARCHAR DEFAULT NULL)\n",
+ " AS $$\n",
+ " DECLARE\n",
+ " already_present BOOLEAN;\n",
+ " from_id_requested INTEGER;\n",
+ " to_id_requested INTEGER;\n",
+ " BEGIN\n",
+ " SELECT id\n",
+ " INTO from_id_requested\n",
+ " FROM vertices\n",
+ " WHERE label = lbl_from;\n",
+ "\n",
+ " SELECT id\n",
+ " INTO to_id_requested\n",
+ " FROM vertices\n",
+ " WHERE label = lbl_to;\n",
+ " \n",
+ " edge_lbl := COALESCE(edge_lbl, lbl_from || ' -> ' || lbl_to);\n",
+ "\n",
+ " SELECT EXISTS(\n",
+ " SELECT *\n",
+ " FROM edges\n",
+ " WHERE (from_id, to_id) =\n",
+ " (from_id_requested, to_id_requested)\n",
+ " )\n",
+ " INTO already_present;\n",
+ " \n",
+ " IF NOT already_present THEN\n",
+ " INSERT INTO edges (from_id, to_id, label)\n",
+ " VALUES (from_id_requested, to_id_requested, edge_lbl);\n",
+ " END IF;\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "783fe9f2",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Note that there can be multiple PL/pgSQL functions with the same name but different argument counts/types. Where ambigious, we need to specify the argument types in `DROP FUNCTION` command."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1e98a4fe",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "CALL add_edge('v1', 'v3');"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c47bfca8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "CALL add_edge('v2', 'v3', 'MyEdge');"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "23bed71b",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT * FROM edges;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ab2dbe93",
+ "metadata": {},
+ "source": [
+ "### Iteration"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "69e40a42",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "CREATE OR REPLACE FUNCTION my_loop() RETURNS INT AS $$\n",
+ " DECLARE\n",
+ " number INT := 0;\n",
+ " sum INT := 0;\n",
+ " BEGIN\n",
+ " LOOP\n",
+ " sum := sum + number;\n",
+ " number := number + 1;\n",
+ " IF number > 100 THEN\n",
+ " EXIT; -- break out of loop\n",
+ " END IF;\n",
+ " END LOOP;\n",
+ "\n",
+ " RETURN sum;\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a5a913e3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT my_loop()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0c15921b",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Instead of the `IF` construct, we can also use the following to break from the loop.\n",
+ "\n",
+ "```sql\n",
+ "EXIT WHEN number > 100;\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5aa71ece",
+ "metadata": {},
+ "source": [
+ "### Cursors"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "196a1267",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DELETE FROM edges;\n",
+ "DELETE FROM vertices;\n",
+ "\n",
+ "SELECT add_vertex('vA');\n",
+ "SELECT add_vertex('vB');\n",
+ "SELECT add_vertex('vC');\n",
+ "SELECT add_vertex('vD');\n",
+ "SELECT add_vertex('vE');\n",
+ "SELECT add_vertex('vF');\n",
+ "SELECT add_vertex('vG');"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "30f08d0b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP FUNCTION IF EXISTS list_vertices;\n",
+ "CREATE FUNCTION list_vertices() RETURNS VARCHAR AS $$\n",
+ " DECLARE\n",
+ " current_label VARCHAR;\n",
+ " result VARCHAR := '';\n",
+ " first BOOLEAN := TRUE;\n",
+ " cur REFCURSOR;\n",
+ " BEGIN\n",
+ " OPEN cur FOR SELECT label FROM vertices;\n",
+ "\n",
+ " LOOP\n",
+ " FETCH FROM cur INTO current_label; \n",
+ "\n",
+ " EXIT WHEN current_label IS NULL;\n",
+ "\n",
+ " IF first THEN\n",
+ " result := current_label;\n",
+ " ELSE\n",
+ " result := result || ', ' || current_label;\n",
+ " END IF;\n",
+ " first := FALSE;\n",
+ " END LOOP;\n",
+ "\n",
+ " CLOSE cur;\n",
+ "\n",
+ " RETURN result;\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dce99e7c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT list_vertices();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9c4cdc48",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "`FETCH`ing from a cursor also implicitly sets a variable named `FOUND`. We can test the value of `FOUND` instead of checking if we fetched a `NULL` row.\n",
+ "\n",
+ "Note the `CLOSE` instruction. It allows the resources related to cursor to be released and allows the cursor variable to be used with `OPEN` again.\n",
+ "\n",
+ "In the `OPEN` instruction we **bound** the cursor variable to a query. We can instead create a cursor variable that is already bound at creation time. We need to declare it as `REFCURSOR` rather than `CURSOR`.\n",
+ "\n",
+ "If we have a `CURSOR` variable, we can use it with a variant of `OPEN` without `FOR`, as below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "43676203",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP FUNCTION IF EXISTS list_vertices;\n",
+ "CREATE FUNCTION list_vertices() RETURNS VARCHAR AS $$\n",
+ " DECLARE\n",
+ " current_label VARCHAR;\n",
+ " result VARCHAR := '';\n",
+ " first BOOLEAN := TRUE;\n",
+ " cur CURSOR FOR SELECT label FROM vertices;\n",
+ " BEGIN\n",
+ " OPEN cur;\n",
+ "\n",
+ " LOOP\n",
+ " FETCH FROM cur INTO current_label; \n",
+ "\n",
+ " EXIT WHEN current_label IS NULL;\n",
+ "\n",
+ " IF first THEN\n",
+ " result := current_label;\n",
+ " ELSE\n",
+ " result := result || ', ' || current_label;\n",
+ " END IF;\n",
+ " first := FALSE;\n",
+ " END LOOP;\n",
+ "\n",
+ " CLOSE cur;\n",
+ "\n",
+ " RETURN result;\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fa4c78a1",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT list_vertices();"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "020a0d03",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT 'wxy'\n",
+ "UNION\n",
+ "SELECT 'xyz'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6593383a",
+ "metadata": {},
+ "source": [
+ "### \"RECORD\" Type"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d040e18e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP FUNCTION IF EXISTS record_example;\n",
+ "CREATE FUNCTION record_example() RETURNS VARCHAR AS $$\n",
+ " DECLARE\n",
+ " rec RECORD;\n",
+ " BEGIN\n",
+ " SELECT * INTO rec FROM vertices LIMIT 1;\n",
+ " \n",
+ " RETURN rec.label || '(' || rec.id || ')';\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "31c3563c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT record_example()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e929d5c5",
+ "metadata": {},
+ "source": [
+ "### \"FOR … IN\" Loops"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e07dd597",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "We can use a `CURSOR` (but not a `REFCURSOR`) with a special variant of the `LOOP` construct."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b0f5ef4f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP FUNCTION IF EXISTS list_vertices;\n",
+ "CREATE FUNCTION list_vertices() RETURNS VARCHAR AS $$\n",
+ " DECLARE\n",
+ " result VARCHAR := '';\n",
+ " first BOOLEAN := TRUE;\n",
+ " cur CURSOR FOR SELECT label FROM vertices;\n",
+ " BEGIN\n",
+ " FOR record IN cur LOOP\n",
+ " IF first THEN\n",
+ " result := record.label;\n",
+ " ELSE\n",
+ " result := result || ', ' || record.label;\n",
+ " END IF;\n",
+ " first := FALSE;\n",
+ " END LOOP;\n",
+ "\n",
+ " return result;\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1e0bd1e4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT list_vertices();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a9d8d3f2",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Finally, we can omit the declaration of a cursor variable and put query code in the loop construct."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "147edc94",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP FUNCTION IF EXISTS list_vertices;\n",
+ "CREATE FUNCTION list_vertices() RETURNS VARCHAR AS $$\n",
+ " DECLARE\n",
+ " result VARCHAR := '';\n",
+ " first BOOLEAN := TRUE;\n",
+ " record RECORD;\n",
+ " BEGIN\n",
+ " FOR record IN SELECT label FROM vertices LOOP\n",
+ " IF first THEN\n",
+ " result := record.label;\n",
+ " ELSE\n",
+ " result := result || ', ' || record.label;\n",
+ " END IF;\n",
+ " first := FALSE;\n",
+ " END LOOP;\n",
+ "\n",
+ " return result;\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "abf479c6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT list_vertices();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f0430572",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Note that we now needed to explicitly declare the cursor variable (which was not required before).\n",
+ "\n",
+ "It happens to be also possible to create a list like this with an aggregation function built into Postgres, `STRING_AGG()`.\n",
+ "\n",
+ "```sql\n",
+ "SELECT STRING_AGG(label, ', ') FROM vertices;\n",
+ "```\n",
+ "\n",
+ "Note, however, that a loop in PL/pgSQL allows us to control the order of the values that are processed (we can use `ORDER BY` in the query)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bb8a18c6",
+ "metadata": {},
+ "source": [
+ "### Sets of Values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "29327330",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP FUNCTION IF EXISTS set_example;\n",
+ "CREATE FUNCTION set_example() RETURNS SETOF TEXT AS $$\n",
+ " BEGIN\n",
+ " RETURN NEXT 'foo';\n",
+ " RETURN NEXT 'bar';\n",
+ " RETURN NEXT 'baz';\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "23701f09",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT set_example()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "231b572f",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Try also adding the following above the `RETURN NEXT` lines.\n",
+ "\n",
+ "```sql\n",
+ " RETURN QUERY SELECT 'wxy'\n",
+ " UNION\n",
+ " SELECT 'xyz';\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b0444eaf",
+ "metadata": {},
+ "source": [
+ "### \"FETCH\" and \"MOVE\" Commands"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dea00db4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP FUNCTION IF EXISTS fetch_examples;\n",
+ "CREATE FUNCTION fetch_examples() RETURNS SETOF TEXT AS $$\n",
+ " DECLARE\n",
+ " cur CURSOR FOR SELECT label, id\n",
+ " FROM vertices\n",
+ " ORDER BY label;\n",
+ " _label TEXT;\n",
+ " _id INT;\n",
+ " BEGIN\n",
+ " OPEN cur;\n",
+ "\n",
+ " FETCH LAST FROM cur INTO _label, _id;\n",
+ " RETURN NEXT _label || ' (' || _id || ')';\n",
+ "\n",
+ " CLOSE cur;\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8224dd77",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT fetch_examples()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5d7e6049",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Add the following.\n",
+ "\n",
+ "```sql\n",
+ " MOVE FORWARD 1 IN cur;\n",
+ " FETCH FROM cur INTO _label, _id;\n",
+ " RETURN NEXT _label || ' (' || _id || ')';\n",
+ "```\n",
+ "\n",
+ "See that `MOVE` allowed us to skip a row.\n",
+ "\n",
+ "Add the following.\n",
+ "\n",
+ "```sql\n",
+ " MOVE LAST IN cur;\n",
+ " FETCH FROM cur INTO _label, _id;\n",
+ " RETURN NEXT _label || ' (' || _id || ')';\n",
+ "```\n",
+ "\n",
+ "See that the cursor got positioned **after** the final row and subsequent `FETCH` returned a record with `NULL`s.\n",
+ "\n",
+ "Add the following.\n",
+ "\n",
+ "```sql\n",
+ " MOVE LAST IN cur;\n",
+ " MOVE RELATIVE -1 IN cur;\n",
+ " FETCH FROM cur INTO _label, _id;\n",
+ " RETURN NEXT _label || ' (' || _id || ')';\n",
+ "```\n",
+ "\n",
+ "See that we were able to seek backward and fetch the last row of the query. For some queries Postgres might be **by default** unable to move the cursor backward. The keyword `SCROLL` can be added after cursor name in its declaration to mandate that backward seeks are possible, as below.\n",
+ "\n",
+ "```sql\n",
+ "cur SCROLL CURSOR FOR SELECT 'something';\n",
+ "```\n",
+ "\n",
+ "Now, replace the most recently added sequence of `MOVE`&`FETCH` lines and instead add the following, functionally equivalent code.\n",
+ "\n",
+ "```sql\n",
+ " MOVE LAST IN cur;\n",
+ " FETCH RELATIVE 0 FROM cur INTO _label, _id;\n",
+ "```\n",
+ "\n",
+ "Now, replace it with yet shorter, equivalent line.\n",
+ "\n",
+ "```sql\n",
+ " FETCH LAST FROM cur INTO _label, _id;\n",
+ "```\n",
+ "\n",
+ "As you see, the clauses `LAST`, `RELATIVE *count*`, `FORWARD *count*` and others work both with `FETCH` and `MOVE`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b167cd89",
+ "metadata": {},
+ "source": [
+ "#### Using Cursors in Nonprocedural SQL"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3de29f0f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DECLARE sql_cur CURSOR WITH HOLD FOR SELECT * FROM vertices"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3d9fb446",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "The SQL standard does define cursors as well and they can be used outside user-defined functions. Note that they are meant by the standard to be used by other software, not by humans interacting with the database through, say, a shell.\n",
+ "\n",
+ "Cursors are by default closed automatically by the end of transaction. An SQL cursor can be declared `WITH HOLD` to change this behavior and instead have it persist after transaction. We leverage this to play with a cursor from within the Jupyter Notebook, which does not currently handle transactions."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ab34fbab",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "FETCH FORWARD 2 FROM sql_cur"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9a8dce96",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "We can now provide a count after `FORWARD` to fetch more than one row at once. This was not possible with a cursor in PL/pgSQL."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "623baa58",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "MOVE RELATIVE 2 IN sql_cur"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fd5d8868",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "FETCH FORWARD 2 FROM sql_cur"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "281e2cde",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "FETCH ALL IN sql_cur"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7d62c69d",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "The `FETCH ALL`, as used above, is also not available in PL/pgSQL."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "069a051f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "CLOSE sql_cur"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6094d7c3",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Only now can a cursor with name `sql_cur` be declared again."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a503ec5b",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "The `SCROLL` option works analogously as in PL/pgSQL.\n",
+ "\n",
+ "It is also possible to return a cursor (or a set of them!) from a PL/pgSQL function and use it from SQL."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a0bdac01",
+ "metadata": {},
+ "source": [
+ "### Simple Graph Algorithm Example"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d6a9e761",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "CALL add_edge('vA', 'vB');\n",
+ "CALL add_edge('vA', 'vC');\n",
+ "CALL add_edge('vB', 'vC');\n",
+ "CALL add_edge('vB', 'vE');\n",
+ "CALL add_edge('vE', 'vF');\n",
+ "CALL add_edge('vE', 'vA');\n",
+ "CALL add_edge('vD', 'vC');\n",
+ "CALL add_edge('vF', 'vC');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4d0bd5dd",
+ "metadata": {},
+ "source": [
+ "Task: find all vertices reachable from given vertex."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "39b40891",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "This would be difficult to achieve with nonprocedural SQL. BFS and DFS algorithms both utilize iteration."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "84592ab9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP FUNCTION IF EXISTS find_reachable;\n",
+ "CREATE FUNCTION find_reachable(start_label VARCHAR)\n",
+ " RETURNS SETOF VARCHAR AS $$\n",
+ " BEGIN\n",
+ " CREATE TEMPORARY TABLE __current_ids AS\n",
+ " SELECT id FROM vertices WHERE label = start_label;\n",
+ "\n",
+ " CREATE TEMPORARY TABLE __found_ids AS\n",
+ " SELECT * FROM __current_ids;\n",
+ "\n",
+ " LOOP\n",
+ " CREATE TEMPORARY TABLE __tmp_ids AS\n",
+ " SELECT DISTINCT to_id\n",
+ " FROM __current_ids JOIN edges ON id = from_id\n",
+ " WHERE to_id NOT IN (SELECT * FROM __found_ids);\n",
+ "\n",
+ " EXIT WHEN NOT EXISTS (SELECT * FROM __tmp_ids);\n",
+ "\n",
+ " DELETE FROM __current_ids;\n",
+ " INSERT INTO __found_ids SELECT * FROM __tmp_ids;\n",
+ " INSERT INTO __current_ids SELECT * FROM __tmp_ids;\n",
+ " DROP TABLE __tmp_ids;\n",
+ " END LOOP;\n",
+ "\n",
+ " RETURN QUERY SELECT label\n",
+ " FROM __found_ids NATURAL JOIN vertices;\n",
+ "\n",
+ " DROP TABLE __found_ids;\n",
+ " DROP TABLE __current_ids;\n",
+ " DROP TABLE __tmp_ids;\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "651ca228",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT find_reachable('vA')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8f0a39cb",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Note that something similar could also be achieved with nonprocedural SQL using a recursive `WITH` query (not discussed before in this course). An example is presented below.\n",
+ "\n",
+ "```sql\n",
+ "WITH RECURSIVE reachable_ids AS (\n",
+ " SELECT id FROM vertices WHERE label = 'vE'\n",
+ " UNION\n",
+ " SELECT DISTINCT to_id\n",
+ " FROM reachable_ids JOIN edges ON (id = from_id)\n",
+ ")\n",
+ "SELECT label FROM reachable_ids NATURAL JOIN vertices;\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1921b86c",
+ "metadata": {},
+ "source": [
+ "## Support for Other Procedural Languages"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d9588667",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Examples for computing the greatest common denominator."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d7dd04dc",
+ "metadata": {},
+ "source": [
+ "Tcl\n",
+ "\n",
+ "```sql\n",
+ "CREATE EXTENSION pltcl;\n",
+ "\n",
+ "CREATE FUNCTION gcd(a INT, b INT) RETURNS INT AS $$\n",
+ "```\n",
+ "```tcl\n",
+ " if {$b > $a} {lassign \"$b $a\" b a}\n",
+ "\n",
+ " while {$a % $b > 0} {\n",
+ " lassign \"$b [expr {$a % $b}]\" a b\n",
+ " }\n",
+ "\n",
+ " return $b\n",
+ "```\n",
+ "```sql\n",
+ "$$ LANGUAGE pltcl STRICT;\n",
+ "\n",
+ "SELECT gcd(45, 72);\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2df8b161",
+ "metadata": {},
+ "source": [
+ "and Python\n",
+ "\n",
+ "```sql\n",
+ "CREATE EXTENSION plpython3u;\n",
+ "\n",
+ "CREATE FUNCTION gcd(a INT, b INT) RETURNS INT AS $$\n",
+ "```\n",
+ "```python\n",
+ " if b > a:\n",
+ " a, b = b, a\n",
+ "\n",
+ " while a % b:\n",
+ " a, b = b, a%b\n",
+ "\n",
+ " return b\n",
+ "```\n",
+ "```sql\n",
+ "$$ LANGUAGE plpython3u STRICT;\n",
+ "\n",
+ "SELECT gcd(45, 72);\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "daf605d7",
+ "metadata": {},
+ "source": [
+ "## Triggers"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f7367ff8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP TABLE IF EXISTS edge_changes;\n",
+ "CREATE TABLE edge_changes (\n",
+ " change_id SERIAL PRIMARY KEY,\n",
+ " from_id INT,\n",
+ " to_id INT,\n",
+ " label VARCHAR,\n",
+ " \"when\" TIMESTAMP,\n",
+ " change VARCHAR(7),\n",
+ " CHECK (change IN ('added', 'removed'))\n",
+ ");\n",
+ "\n",
+ "CREATE OR REPLACE FUNCTION log_edge_change()\n",
+ " RETURNS TRIGGER AS $$\n",
+ " BEGIN\n",
+ " IF old = new THEN\n",
+ " RETURN NULL;\n",
+ " END IF;\n",
+ "\n",
+ " IF old.from_id IS NOT NULL THEN\n",
+ " INSERT INTO edge_changes\n",
+ " (from_id, to_id, label,\n",
+ " \"when\", change)\n",
+ " VALUES (old.from_id, old.to_id, old.label,\n",
+ " NOW(), 'removed');\n",
+ " END IF;\n",
+ "\n",
+ " IF new.from_id IS NOT NULL THEN\n",
+ " INSERT INTO edge_changes\n",
+ " (from_id, to_id, label,\n",
+ " \"when\", change)\n",
+ " VALUES (new.from_id, new.to_id, new.label,\n",
+ " NOW(), 'added');\n",
+ " END IF;\n",
+ "\n",
+ " RETURN NULL;\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql;\n",
+ "\n",
+ "CREATE OR REPLACE TRIGGER log_change\n",
+ " AFTER INSERT OR DELETE OR UPDATE ON edges\n",
+ " FOR EACH ROW\n",
+ " EXECUTE FUNCTION log_edge_change();"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "068144f2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT * FROM edge_changes;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d5effd28",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "-- A no-op, should not generate a log entry.\n",
+ "UPDATE edges\n",
+ "SET to_id = to_id\n",
+ "WHERE to_id = 2;\n",
+ "\n",
+ "-- Deletion, should generate log entries.\n",
+ "DELETE FROM edges;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "df25ef7a",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Re-run the earlier `SELECT` to see the logged changes."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b6838443",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "CALL add_edge('vA', 'vB');\n",
+ "CALL add_edge('vA', 'vC');\n",
+ "CALL add_edge('vB', 'vC');\n",
+ "CALL add_edge('vB', 'vE');\n",
+ "CALL add_edge('vE', 'vF');\n",
+ "CALL add_edge('vE', 'vA');\n",
+ "CALL add_edge('vD', 'vC');\n",
+ "CALL add_edge('vF', 'vC');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "77e8d81d",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Re-run the earlier `SELECT` to see the logged changes."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0a6f4267",
+ "metadata": {},
+ "source": [
+ "### \"INSTEAD OF\" Triggers and Trigger-Updatable Views"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3b63cb74",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP VIEW IF EXISTS is_edge;\n",
+ "CREATE VIEW is_edge AS\n",
+ "SELECT l.id AS from_id,\n",
+ " l.label AS from_label,\n",
+ " r.id AS to_id,\n",
+ " r.label AS to_label,\n",
+ " e.to_id IS NOT NULL AS present\n",
+ "FROM vertices l\n",
+ " CROSS JOIN vertices r\n",
+ " LEFT JOIN edges e ON (l.id, r.id) = (e.from_id, e.to_id)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "14ccb9d4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT * FROM is_edge"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0115d614",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "UPDATE is_edge\n",
+ "SET present = TRUE\n",
+ "WHERE (from_label, to_label) = ('vF', 'vG')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "73eacf7f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "CREATE OR REPLACE FUNCTION update_is_edge()\n",
+ " RETURNS TRIGGER AS $$\n",
+ " BEGIN\n",
+ " IF new.present THEN\n",
+ " CALL add_edge(old.from_label, old.to_label);\n",
+ " ELSE\n",
+ " DELETE FROM edges\n",
+ " WHERE (from_id, to_id) = (old.from_id, old.to_id);\n",
+ " END IF;\n",
+ "\n",
+ " RETURN NULL;\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql;\n",
+ "\n",
+ "CREATE OR REPLACE TRIGGER handle_update\n",
+ " INSTEAD OF UPDATE ON is_edge\n",
+ " FOR EACH ROW\n",
+ " EXECUTE FUNCTION update_is_edge();"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f0b89edc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "UPDATE is_edge\n",
+ "SET present = TRUE\n",
+ "WHERE (from_label, to_label) = ('vF', 'vG')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4ffd1b04",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT * FROM edge_changes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f9048743",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "We see the log of changes made by our `INSTEAD OF` trigger."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "242432a2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT * FROM edges"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "890dedaf",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "We see the row added through view update."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "316d6132",
+ "metadata": {},
+ "source": [
+ "#### Examples with Sanity Checks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6bab5c17",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "CREATE OR REPLACE FUNCTION update_is_edge()\n",
+ " RETURNS TRIGGER AS $$\n",
+ " BEGIN\n",
+ " IF (old.from_id, old.from_label, old.to_id, old.to_label)\n",
+ " <>\n",
+ " (new.from_id, new.from_label, new.to_id, new.to_label)\n",
+ " THEN\n",
+ " RAISE EXCEPTION\n",
+ " 'Illegal update (from_id = %, to_id = %)',\n",
+ " old.from_id, old.to_id\n",
+ " USING HINT = 'Please only set the `present'' column';\n",
+ " END IF;\n",
+ "\n",
+ " IF new.present THEN\n",
+ " CALL add_edge(old.from_label, old.to_label);\n",
+ " ELSE\n",
+ " DELETE FROM edges\n",
+ " WHERE (from_id, to_id) = (old.from_id, old.to_id);\n",
+ " END IF;\n",
+ "\n",
+ " RETURN NULL;\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql;\n",
+ "\n",
+ "CREATE OR REPLACE TRIGGER handle_update\n",
+ " INSTEAD OF UPDATE ON is_edge\n",
+ " FOR EACH ROW\n",
+ " EXECUTE FUNCTION update_is_edge();"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "58973eac",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "UPDATE is_edge\n",
+ "SET to_label = 'vE'\n",
+ "WHERE (from_label, to_label) = ('vF', 'vG')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "30aa6925",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Above we see the effect of our `RAISE` instruction in trigger's code."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6eccd424",
+ "metadata": {},
+ "source": [
+ "If we do not need a custom exception message, we can use the `ASSERT` command."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "db872df4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "CREATE OR REPLACE FUNCTION update_is_edge()\n",
+ " RETURNS TRIGGER AS $$\n",
+ " BEGIN\n",
+ " ASSERT COALESCE(\n",
+ " (old.from_id, old.from_label, old.to_id, old.to_label)\n",
+ " =\n",
+ " (new.from_id, new.from_label, new.to_id, new.to_label),\n",
+ " TRUE\n",
+ " );\n",
+ "\n",
+ " IF new.present THEN\n",
+ " CALL add_edge(old.from_label, old.to_label);\n",
+ " ELSE\n",
+ " DELETE FROM edges\n",
+ " WHERE (from_id, to_id) = (old.from_id, old.to_id);\n",
+ " END IF;\n",
+ "\n",
+ " RETURN NULL;\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql;\n",
+ "\n",
+ "CREATE OR REPLACE TRIGGER handle_update\n",
+ " INSTEAD OF UPDATE ON is_edge\n",
+ " FOR EACH ROW\n",
+ " EXECUTE FUNCTION update_is_edge();"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "998ce4af",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "UPDATE is_edge\n",
+ "SET to_label = 'vE'\n",
+ "WHERE (from_label, to_label) = ('vF', 'vG')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "645362b3",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "The above exception was generated by the `ASSERT` command."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "365fcfc8",
+ "metadata": {},
+ "source": [
+ "See that valid updates still do work."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3345ae83",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "UPDATE is_edge\n",
+ "SET present = FALSE\n",
+ "WHERE (from_label, to_label) = ('vF', 'vG')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c921b3a6",
+ "metadata": {},
+ "source": [
+ "### Trigger-Implemented Integrity Constraints"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7ff12f0e",
+ "metadata": {},
+ "source": [
+ "The cycle detection algorithm below has been derived from Kahn's.\n",
+ "\n",
+ "https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "17cf214b",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "-- Make sure we have no edges initially.\n",
+ "DELETE FROM edges;\n",
+ "\n",
+ "CREATE OR REPLACE FUNCTION verify_acyclic()\n",
+ " RETURNS TRIGGER AS $$\n",
+ " BEGIN\n",
+ " RAISE WARNING 'Checking for cycles';\n",
+ " -- Prepare a working copy of edges that we'll modify.\n",
+ " DROP TABLE IF EXISTS __edges_left;\n",
+ " CREATE TEMPORARY TABLE __edges_left AS\n",
+ " SELECT from_id, to_id FROM edges;\n",
+ "\n",
+ " LOOP\n",
+ " EXIT WHEN NOT EXISTS (SELECT * FROM __edges_left);\n",
+ "\n",
+ " -- Find vertices that have no incoming edges.\n",
+ " DROP TABLE IF EXISTS __source_vertices;\n",
+ " CREATE TEMPORARY TABLE __source_vertices\n",
+ " AS SELECT _out.from_id\n",
+ " FROM __edges_left _in\n",
+ " RIGHT JOIN __edges_left _out\n",
+ " ON (_in.to_id = _out.from_id)\n",
+ " WHERE _in.to_id IS NULL;\n",
+ "\n",
+ " IF NOT EXISTS (SELECT * FROM __source_vertices) THEN\n",
+ " -- There is no vertex without incoming edge.\n",
+ " RAISE EXCEPTION 'Cycle detected';\n",
+ " END IF;\n",
+ "\n",
+ " DELETE FROM __edges_left\n",
+ " WHERE from_id IN (SELECT * FROM __source_vertices);\n",
+ " END LOOP;\n",
+ "\n",
+ " DROP TABLE __edges_left;\n",
+ " DROP TABLE IF EXISTS source_vertices;\n",
+ "\n",
+ " RETURN NULL;\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql;\n",
+ "\n",
+ "CREATE OR REPLACE TRIGGER forbid_cycles\n",
+ " AFTER INSERT OR UPDATE on edges\n",
+ " FOR EACH ROW\n",
+ " EXECUTE FUNCTION verify_acyclic();"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0b1405a5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "CALL add_edge('vA', 'vB');\n",
+ "CALL add_edge('vA', 'vC');\n",
+ "CALL add_edge('vB', 'vC');\n",
+ "CALL add_edge('vB', 'vE');\n",
+ "CALL add_edge('vE', 'vF');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fa81c7b7",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "The above succeeds, there are no edges yet."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aebee679",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "-- Would create cycle vA → vB → vE → vA.\n",
+ "CALL add_edge('vE', 'vA');"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "35546575",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Our trigger has properly prevented the insertion."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8f5199ed",
+ "metadata": {},
+ "source": [
+ "#### \"WHEN\" clause"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "73130fb1",
+ "metadata": {},
+ "source": [
+ "If a trigger is only meant to be run in certain cases, we can utilize the `WHEN` clause of `CREATE TRIGGER`.\n",
+ "\n",
+ "Let me first introduce you to the `IS DISTINCT FROM` operator."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6ed6c225",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "-- Two NULL values are treated as non-distinct.\n",
+ "SELECT (5, 6, NULL) IS DISTINCT FROM (5, 6, NULL)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4ca99236",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "`IS DISTINCT FROM` can be used to compare values or **tuples** (as in this case)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "574677cb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT (5, 6, NULL) IS DISTINCT FROM (5, 789, NULL)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "10819fa1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "-- We'll log (non-raising) trigger firings.\n",
+ "DROP TABLE IF EXISTS cycle_checks;\n",
+ "CREATE TABLE cycle_checks (\n",
+ " id SERIAL PRIMARY KEY,\n",
+ " _when TIMESTAMP\n",
+ ");\n",
+ "\n",
+ "CREATE OR REPLACE FUNCTION verify_acyclic()\n",
+ " RETURNS TRIGGER AS $$\n",
+ " BEGIN\n",
+ " -- We have just added this `INSERT' command.\n",
+ " INSERT INTO cycle_checks (_when)\n",
+ " VALUES (NOW());\n",
+ "\n",
+ " -- Prepare a working copy of edges that we'll modify.\n",
+ " DROP TABLE IF EXISTS __edges_left;\n",
+ " CREATE TEMPORARY TABLE __edges_left AS\n",
+ " SELECT from_id, to_id FROM edges;\n",
+ "\n",
+ " LOOP\n",
+ " EXIT WHEN NOT EXISTS (SELECT * FROM __edges_left);\n",
+ "\n",
+ " -- Find vertices that have no incoming edges.\n",
+ " DROP TABLE IF EXISTS __source_vertices;\n",
+ " CREATE TEMPORARY TABLE __source_vertices\n",
+ " AS SELECT _out.from_id\n",
+ " FROM __edges_left _in\n",
+ " RIGHT JOIN __edges_left _out\n",
+ " ON (_in.to_id = _out.from_id)\n",
+ " WHERE _in.to_id IS NULL;\n",
+ "\n",
+ " IF NOT EXISTS (SELECT * FROM __source_vertices) THEN\n",
+ " -- There is no vertex without incoming edge.\n",
+ " RAISE EXCEPTION 'Cycle detected';\n",
+ " END IF;\n",
+ "\n",
+ " DELETE FROM __edges_left\n",
+ " WHERE from_id IN (SELECT * FROM __source_vertices);\n",
+ " END LOOP;\n",
+ "\n",
+ " DROP TABLE __edges_left;\n",
+ " DROP TABLE IF EXISTS source_vertices;\n",
+ "\n",
+ " RETURN NULL;\n",
+ " END\n",
+ "$$ LANGUAGE plpgsql;\n",
+ "\n",
+ "DROP TRIGGER IF EXISTS forbid_cycles ON edges;\n",
+ "\n",
+ "CREATE OR REPLACE TRIGGER forbid_cycles_on_insert\n",
+ " AFTER INSERT ON edges\n",
+ " FOR EACH ROW\n",
+ " EXECUTE FUNCTION verify_acyclic();\n",
+ "\n",
+ "CREATE OR REPLACE TRIGGER forbid_cycles_on_update\n",
+ " AFTER UPDATE ON edges\n",
+ " FOR EACH ROW\n",
+ " WHEN ((old.from_id, old.to_id)\n",
+ " IS DISTINCT FROM\n",
+ " (new.from_id, new.to_id))\n",
+ " EXECUTE FUNCTION verify_acyclic();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d5243022",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "As shown above, we can use `WHEN` in `CREATE TRIGGER` to more concisely specify a condition for trigger firing."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dcb15881",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DELETE FROM edges WHERE label = 'vA -> vB';\n",
+ "CALL add_edge('vA', 'vB');\n",
+ "SELECT * FROM cycle_checks;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aad65684",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "SELECT * FROM edges"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a2551564",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "UPDATE edges\n",
+ "SET to_id = 1,\n",
+ " label = 'vB -> vA'\n",
+ "WHERE (from_id, to_id) = (2, 3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6cce7c97",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Cycles are still properly prevented."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1aaebada",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "UPDATE edges\n",
+ "SET label = 'vB ---> vA'\n",
+ "WHERE (from_id, to_id) = (2, 3);\n",
+ "SELECT * FROM cycle_checks;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b7e9d140",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "UPDATE edges\n",
+ "SET label = 'vB -> vA'\n",
+ "WHERE (from_id, to_id) = (2, 3);\n",
+ "SELECT * FROM cycle_checks;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "37ad6c20",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "Irrelevant updates (label changes) give false in the `WHEN` condition and do not cause trigger firing."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d321bebc",
+ "metadata": {},
+ "source": [
+ "### \"FOR EACH STATEMENT\" Triggers"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b95f36cb",
+ "metadata": {},
+ "source": [
+ "Our `FOREACH ROW` trigger fires once for every inserted row, even if they are inserted in one operation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e86cd7fb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DELETE FROM edges WHERE from_id = 1;\n",
+ "\n",
+ "DELETE FROM cycle_checks;\n",
+ "\n",
+ "INSERT INTO edges (from_id, to_id, label)\n",
+ "SELECT * FROM (VALUES (1, 2, 'vA -> vB'),\n",
+ " (1, 3, 'vA -> vC'));\n",
+ " \n",
+ "SELECT * FROM cycle_checks;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1731b8df",
+ "metadata": {},
+ "source": [
+ "_note_\n",
+ "\n",
+ "Note the presence of two rows in `cycle_checks`."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "590016d3",
+ "metadata": {},
+ "source": [
+ "We can avoid reundant checks."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "67a654e3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%sql\n",
+ "DROP TRIGGER IF EXISTS forbid_cycles ON edges;\n",
+ "DROP TRIGGER IF EXISTS forbid_cycles_on_insert ON edges;\n",
+ "DROP TRIGGER IF EXISTS forbid_cycles_on_update ON edges;\n",
+ "\n",
+ "CREATE OR REPLACE TRIGGER forbid_cycles\n",
+ " AFTER INSERT ON edges\n",
+ " FOR EACH STATEMENT\n",
+ " EXECUTE FUNCTION verify_acyclic();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "54b0b700",
+ "metadata": {},
+ "source": [
+ "_notes_\n",
+ "\n",
+ "After re-creating the trigger as `FOR EACH STATEMENT`, re-run the `DELETE`&`DELETE`&`INSERT`&`SELECT` sequence above. See that the trigger now only fires once."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "/gnu/store/q35bxk1i5blx0rcgxphhrq9xqmgha9bz-python-3.11.11/bin/python3",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.11"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}