/** * This file is part of Haketilo. * * Function: Powerful, full-blown format enforcer for externally-obtained JSON. * * Copyright (C) 2021 Wojtek Kosior * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * As additional permission under GNU GPL version 3 section 7, you * may distribute forms of that code without the copy of the GNU * GPL normally required by section 4, provided you include this * license notice and, in case of non-source distribution, a URL * through which recipients can access the Corresponding Source. * If you modify file(s) with this exception, you may extend this * exception to your version of the file(s), but you are not * obligated to do so. If you do not wish to do so, delete this * exception statement from your version. * * As a special exception to the GPL, any HTML file which merely * makes function calls to this code, and for that purpose * includes it by reference shall be deemed a separate work for * copyright law purposes. If you modify this code, you may extend * this exception to your version of the code, but you are not * obligated to do so. If you do not wish to do so, delete this * exception statement from your version. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <https://www.gnu.org/licenses/>. * * I, Wojtek Kosior, thereby promise not to sue for violation of this file's * license. Although I request that you do not make use this code in a * proprietary program, I am not going to enforce this in court. */ var error_path; var invalid_schema; function parse_json_with_schema(schema, json_string) { error_path = []; invalid_schema = false; try { return sanitize_unknown(schema, JSON.parse(json_string)); } catch (e) { throw `Invalid JSON${invalid_schema ? " schema" : ""}: ${e}.`; } finally { /* Allow garbage collection. */ error_path = undefined; } } function error_message(cause) { return `object${error_path.join("")} ${cause}`; } function sanitize_unknown(schema, item) { let error_msg = undefined; let schema_options = []; let has_default = false; let _default = undefined; if (!Array.isArray(schema) || schema[1] === "matchentry" || schema.length < 2 || !["ordefault", "or"].includes(schema[1])) return sanitize_unknown_no_alternatives(schema, item); if ((schema.length & 1) !== 1) { invalid_schema = true; throw error_message("was not understood"); } for (let i = 0; i < schema.length; i++) { if ((i & 1) !== 1) { schema_options.push(schema[i]); continue; } if (schema[i] === "or") continue; if (schema[i] === "ordefault" && schema.length === i + 2) { has_default = true; _default = schema[i + 1]; break; } invalid_schema = true; throw error_message("was not understood"); } for (const schema_option of schema_options) { try { return sanitize_unknown_no_alternatives(schema_option, item); } catch (e) { if (invalid_schema) throw e; if (has_default) continue; if (error_msg === undefined) error_msg = e; else error_msg = `${error_msg}, or ${e}`; } } if (has_default) return _default; throw error_msg; } function sanitize_unknown_no_alternatives(schema, item) { for (const [schema_check, item_check, sanitizer, type_name] of checks) { if (schema_check(schema)) { if (item_check(item)) return sanitizer(schema, item); throw error_message(`should be ${type_name} but is not`); } } invalid_schema = true; throw error_message("was not understood"); } function key_error_path_segment(key) { return /^[a-zA-Z_][a-zA-Z_0-9]*$/.exec(key) ? `.${key}` : `[${JSON.stringify(key)}]`; } /* * Generic object - one that can contain arbitrary keys (in addition to ones * specified explicitly in the schema). */ function sanitize_genobj(schema, object) { let max_matched_entries = Infinity; let min_matched_entries = 0; let matched_entries = 0; const entry_schemas = []; schema = [...schema]; if (schema[2] === "minentries") { if (schema.length < 4) { invalid_schema = true; throw error_message("was not understood"); } min_matched_entries = schema[3]; schema.splice(2, 2); } if (min_matched_entries < 0) { invalid_schema = true; throw error_message('specifies invalid "minentries" (should be a non-negative number)'); } if (schema[2] === "maxentries") { if (schema.length < 4) { invalid_schema = true; throw error_message("was not understood"); } max_matched_entries = schema[3]; schema.splice(2, 2); } if (max_matched_entries < 0) { invalid_schema = true; throw error_message('specifies invalid "maxentries" (should be a non-negative number)'); } while (schema.length > 2) { let regex = /.+/; if (schema.length > 3) { regex = schema[2]; schema.splice(2, 1); } if (typeof regex === "string") regex = new RegExp(regex); entry_schemas.push([regex, schema[2]]); schema.splice(2, 1); } const result = sanitize_object(schema[0], object); for (const [key, entry] of Object.entries(object)) { if (result.hasOwnProperty(key)) continue; matched_entries += 1; if (matched_entries > max_matched_entries) throw error_message(`has more than ${max_matched_entries} matched entr${max_matched_entries === 1 ? "y" : "ies"}`); error_path.push(key_error_path_segment(key)); let match = false; for (const [key_regex, entry_schema] of entry_schemas) { if (!key_regex.exec(key)) continue; match = true; sanitize_object_entry(result, key, entry_schema, object); break; } if (!match) { const regex_list = entry_schemas.map(i => i[0]).join(", "); throw error_message(`does not match any of key regexes: [${regex_list}]`); } error_path.pop(); } if (matched_entries < min_matched_entries) throw error_message(`has less than ${min_matched_entries} matched entr${min_matched_entries === 1 ? "y" : "ies"}`); return result; } function sanitize_array(schema, array) { let min_length = 0; let max_length = Infinity; let repeat_length = 1; let i = 0; const result = []; schema = [...schema]; if (schema[schema.length - 2] === "maxlen") { max_length = schema[schema.length - 1]; schema.splice(schema.length - 2); } if (schema[schema.length - 2] === "minlen") { min_length = schema[schema.length - 1]; schema.splice(schema.length - 2); } if (["repeat", "repeatfull"].includes(schema[schema.length - 2])) repeat_length = schema.pop(); if (repeat_length < 1) { invalid_schema = true; throw error_message('specifies invalid "${schema[schema.length - 2]}" (should be number greater than 1)'); } if (["repeat", "repeatfull"].includes(schema[schema.length - 1])) { var repeat_directive = schema.pop(); var repeat = schema.splice(schema.length - repeat_length); } else if (schema.length !== array.length) { throw error_message(`does not have exactly ${schema.length} items`); } if (repeat_directive === "repeatfull" && (array.length - schema.length) % repeat_length !== 0) throw error_message(`does not contain a full number of item group repetitions`); if (array.length < min_length) throw error_message(`has less than ${min_length} element${min_length === 1 ? "" : "s"}`); if (array.length > max_length) throw error_message(`has more than ${max_length} element${max_length === 1 ? "" : "s"}`); for (const item of array) { if (i >= schema.length) { i = 0; schema = repeat; } error_path.push(`[${i}]`); const sanitized = sanitize_unknown(schema[i], item); if (sanitized !== discard) result.push(sanitized); error_path.pop(); i++; } return result; } function sanitize_regex(schema, string) { if (schema.test(string)) return string; throw error_message(`does not match regex ${schema}`); } const string_spec_regex = /^string(:(.*))?$/; function sanitize_string(schema, string) { const regex = string_spec_regex.exec(schema)[2]; if (regex === undefined) return string; return sanitize_regex(new RegExp(regex), string); } function sanitize_object(schema, object) { const result = {}; for (let [key, entry_schema] of Object.entries(schema)) { error_path.push(key_error_path_segment(key)); sanitize_object_entry(result, key, entry_schema, object); error_path.pop(); } return result; } function sanitize_object_entry(result, key, entry_schema, object) { let optional = false; let has_default = false; let _default = undefined; if (Array.isArray(entry_schema) && entry_schema.length > 1) { if (entry_schema[0] === "optional") { optional = true; entry_schema = [...entry_schema].splice(1); const idx_def = entry_schema.length - (entry_schema.length & 1) - 1; if (entry_schema[idx_def] === "default") { has_default = true; _default = entry_schema[idx_def + 1]; entry_schema.splice(idx_def); } else if ((entry_schema.length & 1) !== 1) { invalid_schema = true; throw error_message("was not understood"); } if (entry_schema.length < 2) entry_schema = entry_schema[0]; } } let unsanitized_value = object[key]; if (unsanitized_value === undefined) { if (!optional) throw error_message("is missing"); if (has_default) result[key] = _default; return; } const sanitized = sanitize_unknown(entry_schema, unsanitized_value); if (sanitized !== discard) result[key] = sanitized; } function take_literal(schema, item) { return item; } /* * This function is used like a symbol. Other parts of code do sth like * `item === discard` to check if item was returned by this function. */ function discard(schema, item) { return discard; } /* * The following are some helper functions to categorize various * schema item specifiers (used in the array below). */ function is_genobj_spec(item) { return Array.isArray(item) && item[1] === "matchentry"; } function is_regex(item) { return typeof item === "object" && typeof item.test === "function"; } function is_string_spec(item) { return typeof item === "string" && string_spec_regex.test(item); } function is_object(item) { return typeof item === "object"; } function eq(what) { return i => i === what; } /* Array and null checks must go before object check. */ const checks = [ [is_genobj_spec, is_object, sanitize_genobj, "an object"], [Array.isArray, Array.isArray, sanitize_array, "an array"], [eq(null), i => i === null, take_literal, "null"], [is_regex, i => typeof i === "string", sanitize_regex, "a string"], [is_string_spec, i => typeof i === "string", sanitize_string, "a string"], [is_object, is_object, sanitize_object, "an object"], [eq("number"), i => typeof i === "number", take_literal, "a number"], [eq("boolean"), i => typeof i === "boolean", take_literal, "a boolean"], [eq("anything"), i => true, take_literal, "dummy"], [eq("discard"), i => true, discard, "dummy"] ]; /* * EXPORTS_START * EXPORT parse_json_with_schema * EXPORTS_END */