aboutsummaryrefslogtreecommitdiff
/**
 * This file is part of Haketilo.
 *
 * Function: Powerful, full-blown format enforcer for externally-obtained JSON.
 *
 * Copyright (C) 2021 Wojtek Kosior
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * As additional permission under GNU GPL version 3 section 7, you
 * may distribute forms of that code without the copy of the GNU
 * GPL normally required by section 4, provided you include this
 * license notice and, in case of non-source distribution, a URL
 * through which recipients can access the Corresponding Source.
 * If you modify file(s) with this exception, you may extend this
 * exception to your version of the file(s), but you are not
 * obligated to do so. If you do not wish to do so, delete this
 * exception statement from your version.
 *
 * As a special exception to the GPL, any HTML file which merely
 * makes function calls to this code, and for that purpose
 * includes it by reference shall be deemed a separate work for
 * copyright law purposes. If you modify this code, you may extend
 * this exception to your version of the code, but you are not
 * obligated to do so. If you do not wish to do so, delete this
 * exception statement from your version.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 *
 * I, Wojtek Kosior, thereby promise not to sue for violation of this file's
 * license. Although I request that you do not make use this code in a
 * proprietary program, I am not going to enforce this in court.
 */

var error_path;
var invalid_schema;

function parse_json_with_schema(schema, json_string)
{
    error_path = [];
    invalid_schema = false;

    try {
	return sanitize_unknown(schema, JSON.parse(json_string));
    } catch (e) {
	throw `Invalid JSON${invalid_schema ? " schema" : ""}: ${e}.`;
    } finally {
	/* Allow garbage collection. */
	error_path = undefined;
    }
}

function error_message(cause)
{
    return `object${error_path.join("")} ${cause}`;
}

function sanitize_unknown(schema, item)
{
    let error_msg = undefined;
    let schema_options = [];
    let has_default = false;
    let _default = undefined;

    if (!Array.isArray(schema) || schema[1] === "matchentry" ||
	schema.length < 2 || !["ordefault", "or"].includes(schema[1]))
	return sanitize_unknown_no_alternatives(schema, item);

    if ((schema.length & 1) !== 1) {
	invalid_schema = true;
	throw error_message("was not understood");
    }

    for (let i = 0; i < schema.length; i++) {
	if ((i & 1) !== 1) {
	    schema_options.push(schema[i]);
	    continue;
	}

	if (schema[i] === "or")
	    continue;
	if (schema[i] === "ordefault" && schema.length === i + 2) {
	    has_default = true;
	    _default = schema[i + 1];
	    break;
	}

	invalid_schema = true;
	throw error_message("was not understood");
    }

    for (const schema_option of schema_options) {
	try {
	    return sanitize_unknown_no_alternatives(schema_option, item);
	} catch (e) {
	    if (invalid_schema)
		throw e;

	    if (has_default)
		continue;

	    if (error_msg === undefined)
		error_msg = e;
	    else
		error_msg = `${error_msg}, or ${e}`;
	}
    }

    if (has_default)
	return _default;

    throw error_msg;
}

function sanitize_unknown_no_alternatives(schema, item)
{
    for (const [schema_check, item_check, sanitizer, type_name] of checks) {
	if (schema_check(schema)) {
	    if (item_check(item))
		return sanitizer(schema, item);
	    throw error_message(`should be ${type_name} but is not`);
	}
    }

    invalid_schema = true;
    throw error_message("was not understood");
}

function key_error_path_segment(key)
{
    return /^[a-zA-Z_][a-zA-Z_0-9]*$/.exec(key) ?
	`.${key}` : `[${JSON.stringify(key)}]`;
}

/*
 * Generic object - one that can contain arbitrary keys (in addition to ones
 * specified explicitly in the schema).
 */
function sanitize_genobj(schema, object)
{
    let max_matched_entries = Infinity;
    let min_matched_entries = 0;
    let matched_entries = 0;
    const entry_schemas = [];
    schema = [...schema];

    if (schema[2] === "minentries") {
	if (schema.length < 4) {
	    invalid_schema = true;
	    throw error_message("was not understood");
	}

	min_matched_entries = schema[3];
	schema.splice(2, 2);
    }

    if (min_matched_entries < 0) {
	invalid_schema = true;
	throw error_message('specifies invalid "minentries" (should be a non-negative number)');
    }

    if (schema[2] === "maxentries") {
	if (schema.length < 4) {
	    invalid_schema = true;
	    throw error_message("was not understood");
	}

	max_matched_entries = schema[3];
	schema.splice(2, 2);
    }

    if (max_matched_entries < 0) {
	invalid_schema = true;
	throw error_message('specifies invalid "maxentries" (should be a non-negative number)');
    }

    while (schema.length > 2) {
	let regex = /.+/;

	if (schema.length > 3) {
	    regex = schema[2];
	    schema.splice(2, 1);
	}

	if (typeof regex === "string")
	    regex = new RegExp(regex);

	entry_schemas.push([regex, schema[2]]);
	schema.splice(2, 1);
    }

    const result = sanitize_object(schema[0], object);

    for (const [key, entry] of Object.entries(object)) {
	if (result.hasOwnProperty(key))
	    continue;

	matched_entries += 1;
	if (matched_entries > max_matched_entries)
	    throw error_message(`has more than ${max_matched_entries} matched entr${max_matched_entries === 1 ? "y" : "ies"}`);

	error_path.push(key_error_path_segment(key));

	let match = false;
	for (const [key_regex, entry_schema] of entry_schemas) {
	    if (!key_regex.exec(key))
		continue;

	    match = true;

	    sanitize_object_entry(result, key, entry_schema, object);
	    break;
	}

	if (!match) {
	    const regex_list = entry_schemas.map(i => i[0]).join(", ");
	    throw error_message(`does not match any of key regexes: [${regex_list}]`);
	}

	error_path.pop();
    }

    if (matched_entries < min_matched_entries)
	throw error_message(`has less than ${min_matched_entries} matched entr${min_matched_entries === 1 ? "y" : "ies"}`);

    return result;
}

function sanitize_array(schema, array)
{
    let min_length = 0;
    let max_length = Infinity;
    let repeat_length = 1;
    let i = 0;
    const result = [];

    schema = [...schema];
    if (schema[schema.length - 2] === "maxlen") {
	max_length = schema[schema.length - 1];
	schema.splice(schema.length - 2);
    }

    if (schema[schema.length - 2] === "minlen") {
	min_length = schema[schema.length - 1];
	schema.splice(schema.length - 2);
    }

    if (["repeat", "repeatfull"].includes(schema[schema.length - 2]))
	repeat_length = schema.pop();
    if (repeat_length < 1) {
	invalid_schema = true;
	throw error_message('specifies invalid "${schema[schema.length - 2]}" (should be number greater than 1)');
    }
    if (["repeat", "repeatfull"].includes(schema[schema.length - 1])) {
	var repeat_directive = schema.pop();
	var repeat = schema.splice(schema.length - repeat_length);
    } else if (schema.length !== array.length) {
	throw error_message(`does not have exactly ${schema.length} items`);
    }

    if (repeat_directive === "repeatfull" &&
	(array.length - schema.length) % repeat_length !== 0)
	throw error_message(`does not contain a full number of item group repetitions`);

    if (array.length < min_length)
	throw error_message(`has less than ${min_length} element${min_length === 1 ? "" : "s"}`);

    if (array.length > max_length)
	throw error_message(`has more than ${max_length} element${max_length === 1 ? "" : "s"}`);

    for (const item of array) {
	if (i >= schema.length) {
	    i = 0;
	    schema = repeat;
	}

	error_path.push(`[${i}]`);
	const sanitized = sanitize_unknown(schema[i], item);
	if (sanitized !== discard)
	    result.push(sanitized);
	error_path.pop();

	i++;
    }

    return result;
}

function sanitize_regex(schema, string)
{
    if (schema.test(string))
	return string;

    throw error_message(`does not match regex ${schema}`);
}

const string_spec_regex = /^string(:(.*))?$/;

function sanitize_string(schema, string)
{
    const regex = string_spec_regex.exec(schema)[2];

    if (regex === undefined)
	return string;

    return sanitize_regex(new RegExp(regex), string);
}

function sanitize_object(schema, object)
{
    const result = {};

    for (let [key, entry_schema] of Object.entries(schema)) {
	error_path.push(key_error_path_segment(key));
	sanitize_object_entry(result, key, entry_schema, object);
	error_path.pop();
    }

    return result;
}

function sanitize_object_entry(result, key, entry_schema, object)
{
    let optional = false;
    let has_default = false;
    let _default = undefined;

    if (Array.isArray(entry_schema) && entry_schema.length > 1) {
	if (entry_schema[0] === "optional") {
	    optional = true;
	    entry_schema = [...entry_schema].splice(1);

	    const idx_def = entry_schema.length - (entry_schema.length & 1) - 1;
	    if (entry_schema[idx_def] === "default") {
		has_default = true;
		_default = entry_schema[idx_def + 1];
		entry_schema.splice(idx_def);
	    } else if ((entry_schema.length & 1) !== 1) {
		invalid_schema = true;
		throw error_message("was not understood");
	    }

	    if (entry_schema.length < 2)
		entry_schema = entry_schema[0];
	}
    }

    let unsanitized_value = object[key];
    if (unsanitized_value === undefined) {
	if (!optional)
	    throw error_message("is missing");

	if (has_default)
	    result[key] = _default;

	return;
    }

    const sanitized = sanitize_unknown(entry_schema, unsanitized_value);
    if (sanitized !== discard)
	result[key] = sanitized;
}

function take_literal(schema, item)
{
    return item;
}

/*
 * This function is used like a symbol. Other parts of code do sth like
 * `item === discard` to check if item was returned by this function.
 */
function discard(schema, item)
{
    return discard;
}

/*
 * The following are some helper functions to categorize various
 * schema item specifiers (used in the array below).
 */

function is_genobj_spec(item)
{
    return Array.isArray(item) && item[1] === "matchentry";
}

function is_regex(item)
{
    return typeof item === "object" && typeof item.test === "function";
}

function is_string_spec(item)
{
    return typeof item === "string" && string_spec_regex.test(item);
}

function is_object(item)
{
    return typeof item === "object";
}

function eq(what)
{
    return i => i === what;
}

/* Array and null checks must go before object check. */
const checks = [
    [is_genobj_spec, is_object,                   sanitize_genobj, "an object"],
    [Array.isArray,  Array.isArray,               sanitize_array,  "an array"],
    [eq(null),       i => i === null,             take_literal,    "null"],
    [is_regex,       i => typeof i === "string",  sanitize_regex,  "a string"],
    [is_string_spec, i => typeof i === "string",  sanitize_string, "a string"],
    [is_object,      is_object,                   sanitize_object, "an object"],
    [eq("number"),   i => typeof i === "number",  take_literal,    "a number"],
    [eq("boolean"),  i => typeof i === "boolean", take_literal,    "a boolean"],
    [eq("anything"), i => true,                   take_literal,    "dummy"],
    [eq("discard"),  i => true,                   discard,         "dummy"]
];

/*
 * EXPORTS_START
 * EXPORT parse_json_with_schema
 * EXPORTS_END
 */