diff options
Diffstat (limited to 'common/sanitize_JSON.js')
-rw-r--r-- | common/sanitize_JSON.js | 431 |
1 files changed, 0 insertions, 431 deletions
diff --git a/common/sanitize_JSON.js b/common/sanitize_JSON.js deleted file mode 100644 index e03e396..0000000 --- a/common/sanitize_JSON.js +++ /dev/null @@ -1,431 +0,0 @@ -/** - * This file is part of Haketilo. - * - * Function: Powerful, full-blown format enforcer for externally-obtained JSON. - * - * Copyright (C) 2021 Wojtek Kosior - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * As additional permission under GNU GPL version 3 section 7, you - * may distribute forms of that code without the copy of the GNU - * GPL normally required by section 4, provided you include this - * license notice and, in case of non-source distribution, a URL - * through which recipients can access the Corresponding Source. - * If you modify file(s) with this exception, you may extend this - * exception to your version of the file(s), but you are not - * obligated to do so. If you do not wish to do so, delete this - * exception statement from your version. - * - * As a special exception to the GPL, any HTML file which merely - * makes function calls to this code, and for that purpose - * includes it by reference shall be deemed a separate work for - * copyright law purposes. If you modify this code, you may extend - * this exception to your version of the code, but you are not - * obligated to do so. If you do not wish to do so, delete this - * exception statement from your version. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <https://www.gnu.org/licenses/>. - * - * I, Wojtek Kosior, thereby promise not to sue for violation of this file's - * license. Although I request that you do not make use of this code in a - * proprietary program, I am not going to enforce this in court. - */ - -var error_path; -var invalid_schema; - -function parse_json_with_schema(schema, json_string) -{ - error_path = []; - invalid_schema = false; - - try { - return sanitize_unknown(schema, JSON.parse(json_string)); - } catch (e) { - throw `Invalid JSON${invalid_schema ? " schema" : ""}: ${e}.`; - } finally { - /* Allow garbage collection. */ - error_path = undefined; - } -} - -function error_message(cause) -{ - return `object${error_path.join("")} ${cause}`; -} - -function sanitize_unknown(schema, item) -{ - let error_msg = undefined; - let schema_options = []; - let has_default = false; - let _default = undefined; - - if (!Array.isArray(schema) || schema[1] === "matchentry" || - schema.length < 2 || !["ordefault", "or"].includes(schema[1])) - return sanitize_unknown_no_alternatives(schema, item); - - if ((schema.length & 1) !== 1) { - invalid_schema = true; - throw error_message("was not understood"); - } - - for (let i = 0; i < schema.length; i++) { - if ((i & 1) !== 1) { - schema_options.push(schema[i]); - continue; - } - - if (schema[i] === "or") - continue; - if (schema[i] === "ordefault" && schema.length === i + 2) { - has_default = true; - _default = schema[i + 1]; - break; - } - - invalid_schema = true; - throw error_message("was not understood"); - } - - for (const schema_option of schema_options) { - try { - return sanitize_unknown_no_alternatives(schema_option, item); - } catch (e) { - if (invalid_schema) - throw e; - - if (has_default) - continue; - - if (error_msg === undefined) - error_msg = e; - else - error_msg = `${error_msg}, or ${e}`; - } - } - - if (has_default) - return _default; - - throw error_msg; -} - -function sanitize_unknown_no_alternatives(schema, item) -{ - for (const [schema_check, item_check, sanitizer, type_name] of checks) { - if (schema_check(schema)) { - if (item_check(item)) - return sanitizer(schema, item); - throw error_message(`should be ${type_name} but is not`); - } - } - - invalid_schema = true; - throw error_message("was not understood"); -} - -function key_error_path_segment(key) -{ - return /^[a-zA-Z_][a-zA-Z_0-9]*$/.exec(key) ? - `.${key}` : `[${JSON.stringify(key)}]`; -} - -/* - * Generic object - one that can contain arbitrary keys (in addition to ones - * specified explicitly in the schema). - */ -function sanitize_genobj(schema, object) -{ - let max_matched_entries = Infinity; - let min_matched_entries = 0; - let matched_entries = 0; - const entry_schemas = []; - schema = [...schema]; - - if (schema[2] === "minentries") { - if (schema.length < 4) { - invalid_schema = true; - throw error_message("was not understood"); - } - - min_matched_entries = schema[3]; - schema.splice(2, 2); - } - - if (min_matched_entries < 0) { - invalid_schema = true; - throw error_message('specifies invalid "minentries" (should be a non-negative number)'); - } - - if (schema[2] === "maxentries") { - if (schema.length < 4) { - invalid_schema = true; - throw error_message("was not understood"); - } - - max_matched_entries = schema[3]; - schema.splice(2, 2); - } - - if (max_matched_entries < 0) { - invalid_schema = true; - throw error_message('specifies invalid "maxentries" (should be a non-negative number)'); - } - - while (schema.length > 2) { - let regex = /.+/; - - if (schema.length > 3) { - regex = schema[2]; - schema.splice(2, 1); - } - - if (typeof regex === "string") - regex = new RegExp(regex); - - entry_schemas.push([regex, schema[2]]); - schema.splice(2, 1); - } - - const result = sanitize_object(schema[0], object); - - for (const [key, entry] of Object.entries(object)) { - if (result.hasOwnProperty(key)) - continue; - - matched_entries += 1; - if (matched_entries > max_matched_entries) - throw error_message(`has more than ${max_matched_entries} matched entr${max_matched_entries === 1 ? "y" : "ies"}`); - - error_path.push(key_error_path_segment(key)); - - let match = false; - for (const [key_regex, entry_schema] of entry_schemas) { - if (!key_regex.exec(key)) - continue; - - match = true; - - sanitize_object_entry(result, key, entry_schema, object); - break; - } - - if (!match) { - const regex_list = entry_schemas.map(i => i[0]).join(", "); - throw error_message(`does not match any of key regexes: [${regex_list}]`); - } - - error_path.pop(); - } - - if (matched_entries < min_matched_entries) - throw error_message(`has less than ${min_matched_entries} matched entr${min_matched_entries === 1 ? "y" : "ies"}`); - - return result; -} - -function sanitize_array(schema, array) -{ - let min_length = 0; - let max_length = Infinity; - let repeat_length = 1; - let i = 0; - const result = []; - - schema = [...schema]; - if (schema[schema.length - 2] === "maxlen") { - max_length = schema[schema.length - 1]; - schema.splice(schema.length - 2); - } - - if (schema[schema.length - 2] === "minlen") { - min_length = schema[schema.length - 1]; - schema.splice(schema.length - 2); - } - - if (["repeat", "repeatfull"].includes(schema[schema.length - 2])) - repeat_length = schema.pop(); - if (repeat_length < 1) { - invalid_schema = true; - throw error_message('specifies invalid "${schema[schema.length - 2]}" (should be number greater than 1)'); - } - if (["repeat", "repeatfull"].includes(schema[schema.length - 1])) { - var repeat_directive = schema.pop(); - var repeat = schema.splice(schema.length - repeat_length); - } else if (schema.length !== array.length) { - throw error_message(`does not have exactly ${schema.length} items`); - } - - if (repeat_directive === "repeatfull" && - (array.length - schema.length) % repeat_length !== 0) - throw error_message(`does not contain a full number of item group repetitions`); - - if (array.length < min_length) - throw error_message(`has less than ${min_length} element${min_length === 1 ? "" : "s"}`); - - if (array.length > max_length) - throw error_message(`has more than ${max_length} element${max_length === 1 ? "" : "s"}`); - - for (const item of array) { - if (i >= schema.length) { - i = 0; - schema = repeat; - } - - error_path.push(`[${i}]`); - const sanitized = sanitize_unknown(schema[i], item); - if (sanitized !== discard) - result.push(sanitized); - error_path.pop(); - - i++; - } - - return result; -} - -function sanitize_regex(schema, string) -{ - if (schema.test(string)) - return string; - - throw error_message(`does not match regex ${schema}`); -} - -const string_spec_regex = /^string(:(.*))?$/; - -function sanitize_string(schema, string) -{ - const regex = string_spec_regex.exec(schema)[2]; - - if (regex === undefined) - return string; - - return sanitize_regex(new RegExp(regex), string); -} - -function sanitize_object(schema, object) -{ - const result = {}; - - for (let [key, entry_schema] of Object.entries(schema)) { - error_path.push(key_error_path_segment(key)); - sanitize_object_entry(result, key, entry_schema, object); - error_path.pop(); - } - - return result; -} - -function sanitize_object_entry(result, key, entry_schema, object) -{ - let optional = false; - let has_default = false; - let _default = undefined; - - if (Array.isArray(entry_schema) && entry_schema.length > 1) { - if (entry_schema[0] === "optional") { - optional = true; - entry_schema = [...entry_schema].splice(1); - - const idx_def = entry_schema.length - (entry_schema.length & 1) - 1; - if (entry_schema[idx_def] === "default") { - has_default = true; - _default = entry_schema[idx_def + 1]; - entry_schema.splice(idx_def); - } else if ((entry_schema.length & 1) !== 1) { - invalid_schema = true; - throw error_message("was not understood"); - } - - if (entry_schema.length < 2) - entry_schema = entry_schema[0]; - } - } - - let unsanitized_value = object[key]; - if (unsanitized_value === undefined) { - if (!optional) - throw error_message("is missing"); - - if (has_default) - result[key] = _default; - - return; - } - - const sanitized = sanitize_unknown(entry_schema, unsanitized_value); - if (sanitized !== discard) - result[key] = sanitized; -} - -function take_literal(schema, item) -{ - return item; -} - -/* - * This function is used like a symbol. Other parts of code do sth like - * `item === discard` to check if item was returned by this function. - */ -function discard(schema, item) -{ - return discard; -} - -/* - * The following are some helper functions to categorize various - * schema item specifiers (used in the array below). - */ - -function is_genobj_spec(item) -{ - return Array.isArray(item) && item[1] === "matchentry"; -} - -function is_regex(item) -{ - return typeof item === "object" && typeof item.test === "function"; -} - -function is_string_spec(item) -{ - return typeof item === "string" && string_spec_regex.test(item); -} - -function is_object(item) -{ - return typeof item === "object"; -} - -function eq(what) -{ - return i => i === what; -} - -/* Array and null checks must go before object check. */ -const checks = [ - [is_genobj_spec, is_object, sanitize_genobj, "an object"], - [Array.isArray, Array.isArray, sanitize_array, "an array"], - [eq(null), i => i === null, take_literal, "null"], - [is_regex, i => typeof i === "string", sanitize_regex, "a string"], - [is_string_spec, i => typeof i === "string", sanitize_string, "a string"], - [is_object, is_object, sanitize_object, "an object"], - [eq("number"), i => typeof i === "number", take_literal, "a number"], - [eq("boolean"), i => typeof i === "boolean", take_literal, "a boolean"], - [eq("anything"), i => true, take_literal, "dummy"], - [eq("discard"), i => true, discard, "dummy"] -]; - -#EXPORT parse_json_with_schema |