aboutsummaryrefslogtreecommitdiff
#!/usr/bin/awk -f
#
# SPDX-License-Identifier: CC0-1.0
#
# Process javascript files and resolve dependencies between them
#
# This file is part of Haketilo
#
# Copyright (C) 2021, Wojtek Kosior
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the CC0 1.0 Universal License as published by
# the Creative Commons Corporation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# CC0 1.0 Universal License for more details.

BEGIN {
    true = 1
    false = 0
}

BEGIN {
    identifier_re    = "[_a-zA-Z][_a-zA-Z0-9]*"
    path_dir_re      = "([-_a-zA-Z0-9][-._a-zA-Z0-9]*/)*"
    path_ext_re      = "(\\.[-_.a-zA-Z0-9]*)?"
    path_re_noanchor = path_dir_re identifier_re path_ext_re
    path_re          = "^" path_re_noanchor "$"

    if_clause_re = "!?" identifier_re
    if_AND_re    = "([[:space:]]+&&[[:space:]]+" if_clause_re ")*"
    if_OR_re     = "([[:space:]]+[|][|][[:space:]]+" if_clause_re ")*"

    directive_args_patterns["IF"]      = ("^" if_clause_re \
					  "(" if_AND_re "|" if_OR_re ")$")
    directive_args_patterns["ENDIF"]   = "^$"
    directive_args_patterns["ELSE"]    = "^$"
    directive_args_patterns["ELIF"]    = "^(NOT[[:space:]]+)?" identifier_re "$"
    directive_args_patterns["DEFINE"]  = "^" identifier_re "$"
    directive_args_patterns["UNDEF"]   = "^" identifier_re "$"
    directive_args_patterns["ERROR"]   = "^.*$"
    directive_args_patterns["COPY"]    = "^[^[:space:]]+$"
    directive_args_patterns["INCLUDE"] = "^[^[:space:]]+$"
    directive_args_patterns["INCLUDE_VERBATIM"] = "^[^[:space:]]+$"

    AS_re                = "AS[[:space:]]+" identifier_re
    maybe_AS_re          = "([[:space:]]+" AS_re ")?"
    FROM_clause_re       = identifier_re maybe_AS_re
    more_FROM_clauses_re = "([[:space:]]*,[[:space:]]*" FROM_clause_re ")*"
    FROM_IMPORT_re       = "[^[:space:]]+[[:space:]]+IMPORT[[:space:]]+"
    EXPORT_AS_re         = ".*[^[:space:]][[:space:]]+" AS_re

    directive_args_patterns["IMPORT"] = "^[^[:space:]]+" maybe_AS_re "$"
    directive_args_patterns["FROM"]   = ("^" FROM_IMPORT_re FROM_clause_re \
					 more_FROM_clauses_re "$")
    directive_args_patterns["EXPORT"] = "^(" EXPORT_AS_re "|" identifier_re ")$"

    directive_args_patterns["LOADJS"] = "^[^[:space:]]+$"
    directive_args_patterns["LOADCSS"] = "^[^[:space:]]+$"

    directive_args_patterns["LOADHTML"] = "^[^[:space:]]+$"
}

function validate_path(read_path, path, line) {
    if (path !~ (path_re)) {
	printf "ERROR: File path in %s does not match '%s': %s\n",
	    read_path, path_re, line > "/dev/stderr"
	return 1
    }

    return 0
}

function identifier_from_path(path) {
    sub("^" path_dir_re, "", path)
    sub(path_ext_re "$", "", path)

    return path
}

function last_token(line) {
    sub("^.*[[:space:]]", "", line)

    return line
}

function first_token(line) {
    sub("[[:space:]].*$", "", line)

    return line
}

function is_empty(array,    key) {
    for (key in array)
	return false
    return true
}

function clear_array(array,    key) {
    for (key in array)
	delete array[key]
}

function add_line(path, line, where) {
    if (where != "amalgamation_root_file")
	lines[path,++lines_count[path]] = line

    if (where != "non_amalgamation_file" &&
	path == js_to_amalgamate)
	main_js_lines[++main_js_lines_count] = line
}

BEGIN {
    delete page_js_deps[0]
    page_js_deps_count = 0
}

function process_file(path, read_path, mode,
		      line, result, line_part, directive, directive_args,
		      if_nesting, if_nesting_true, if_branch_processed) {
    if (path in modes && modes[path] != mode) {
	printf "ERROR: File %s used multiple times in different contexts\n",
	    path > "/dev/stderr"
	return 1
    }

    if (mode == "html" && path == read_path) {
	clear_array(page_js_deps)
	page_js_deps_count = 0
	clear_array(page_css_deps)
    }

    modes[path] = mode

    if (!(path in reading)) {
	if (path in lines_count)
	    return 0
	lines_count[path]
    }

    reading[read_path]

    if (mode == "js" && path == read_path) {
	add_line(path, "\"use strict\";")
	add_line(path, "this.haketilo_exports = this.haketilo_exports || {};")
	add_line(path, "this.haketilo_exports[\"" path "\"] = {};")

	add_line(path, "window.globalThis = this", "amalgamation_root_file")

	add_line(path, "")

	add_line(path, "(function() {", "non_amalgamation_file")
	add_line(path, "var globalThis = this.haketilo_this",
		                        "non_amalgamation_file")
	add_line(path, "{",             "non_amalgamation_file")
    }

    while (true) {
	result = (getline line < read_path)
	if (result < 0) {
	    printf "ERROR: Could not read %s\n", read_path > "/dev/stderr"
	    return 1
	}
	if (result == 0) {
	    if (!(path in appended_lines_counts) || \
		additional_line_nr[path] == appended_lines_counts[path])
		break

	    line = appended_lines[path,++additional_line_nr[path]]
	}

	if (line !~ /^#/) {
	    if (if_nesting_true == if_nesting)
		add_line(path, line)
	    continue
	}

	while (line ~ /\\$/) {
	    sub(/\\$/, "", line)

	    result = (getline line_part < read_path)
	    if (result < 0) {
		printf "ERROR: Could not read %s\n", read_path > "/dev/stderr"
		return 1
	    }
	    if (result == 0) {
		if (path in appended_lines_counts && \
		    additional_line_nr[path] < appended_lines_counts[path]) {
		    line_part = appended_lines[path,++additional_line_nr[path]]
		} else {
		    printf "ERROR: Unexpected EOF in %s\n",
			read_path > "/dev/stderr"
		    return 1
		}
	    }

	    line = line " " line_part
	}

	directive = substr(line, 2)
	sub(/[[:space:]].*$/, "", directive)

	if (directive !~ \
	    /^(IF|ENDIF|ELSE|ELIF|DEFINE|UNDEF|ERROR|INCLUDE|INCLUDE_VERBATIM|COPY_FILE)$/ &&
	    (mode != "js" || directive !~ /^(IMPORT|FROM|EXPORT)$/) &&
	    (mode != "html" || directive !~ /^(LOADJS|LOADCSS)$/) &&
	    (mode != "manifest" || directive !~ /^(LOADJS|LOADHTML)$/)) {
	    printf "ERROR: Invalid # directive in %s: %s\n",
		read_path, line > "/dev/stderr"
	    return 1
	}

	directive_args = line
	sub(/^#[^[:space:]]*[[:space:]]*/, "", directive_args)
	sub(/[[:space:]]*$/, "", directive_args)

	if (directive_args !~ directive_args_patterns[directive]) {
	    printf "ERROR: #%s arguments in %s do not match '%s': %s\n",
		directive, read_path, directive_args_patterns[directive], line \
		> "/dev/stderr"
	    return 1
	}

	if (directive == "IF") {
	    if (if_nesting_true == if_nesting) {
		if (if_condition_true(directive_args, path))
		    if_nesting_true++
		else
		    if_branch_processed = false
	    }

	    if_nesting++
	} else if (directive == "ENDIF") {
	    if (if_nesting == 0) {
		printf "ERROR: Spurious #ENDIF in %s\n",
		    read_path > "/dev/stderr"
		return 1
	    }

	    if (if_nesting_true == if_nesting)
		if_nesting_true--

	    if_nesting--
	} else if (directive == "ELSE") {
	    if (if_nesting == 0) {
		printf "ERROR: Spurious #ELSE in %s\n",
		    read_path > "/dev/stderr"
		return 1
	    }

	    if (if_nesting == if_nesting_true + 1 && !if_branch_processed) {
		if_nesting_true++
	    } else if (if_nesting == if_nesting_true) {
		if_branch_processed = true
		if_nesting_true--
	    }
	} else if (directive == "ELIF") {
	    if (if_nesting == 0) {
		printf "ERROR: Spurious #ELIF in %s\n",
		    read_path > "/dev/stderr"
		return 1
	    }

	    if (if_nesting == if_nesting_true + 1 && !if_branch_processed &&
		if_condition_true(directive_args, path)) {
		if_nesting_true++
	    } else if (if_nesting == if_nesting_true) {
		if_branch_processed = true
		if_nesting_true--
	    }
	} else if (if_nesting_true != if_nesting) {
	    continue
	} else if (directive == "DEFINE") {
	    defines[path,directive_args]
	} else if (directive == "UNDEF") {
	    delete defines[path,directive_args]
	} else if (directive == "ERROR") {
	    printf "ERROR: File %s says: %s\n",
		read_path, directive_args > "/dev/stderr"
	    return 1
	} else if (directive == "INCLUDE") {
	    if (include_file(path, read_path, directive_args, line))
		return 1
	} else if (directive == "INCLUDE_VERBATIM") {
	    if (include_file(path, read_path, directive_args, line, true))
		return 1
	} else if (directive == "COPY_FILE") {
	    if (mark_copy_file(path, read_path, directive_args, line))
		return 1
	} else if (directive == "IMPORT") {
	    if (import_js_file(path, read_path, directive_args, line))
		return 1
	} else if (directive == "FROM") {
	    if (import_from_js_file(path, read_path, directive_args, line))
		return 1
	} else if (directive == "EXPORT") {
	    if (export_from_js_file(path, read_path, directive_args, line))
		return 1
	} else if (directive == "LOADJS") {
	    if (mode == "html") {
		page_js_deps_count = \
		    load_js_file(path, read_path, directive_args, line,
				 page_js_deps, page_js_deps_count)
		if (page_js_deps_count < 1)
		    return 1
	    } else if (mode == "manifest") {
		if (load_js_file(path, read_path, directive_args, line) < 1)
		    return 1
	    }
	} else if (directive == "LOADCSS") {
	    if (load_css_file(path, read_path, directive_args, line,
			      page_css_deps))
		return 1
	} else if (directive == "LOADHTML") {
	    if (load_html_file(path, read_path, directive_args, line))
		return 1
	}
    }

    close(read_path)

    if (if_nesting) {
	printf "ERROR: Unterminated #IF in %s\n", read_path > "/dev/stderr"
	return 1
    }

    if (mode == "js" && path == read_path) {
	add_line(path, "}",         "non_amalgamation_file")
	add_line(path, "}).call({", "non_amalgamation_file")
	add_line(path, "    haketilo_exports: this.haketilo_exports,",
		                    "non_amalgamation_file")
	add_line(path, "    haketilo_this:    this",
		                    "non_amalgamation_file")
	add_line(path, "});",       "non_amalgamation_file")
    }

    delete reading[read_path]
}

function if_condition_true(directive_args, path,
			   result, bool, first_iter, word, negated, alt) {
    first_iter = true

    while (directive_args) {
	word = first_token(directive_args)
	sub(/^[^[:space:]]+[[:space:]]*/, "", directive_args)
	alt = alt || directive_args ~ /^[|][|]/
	sub(/^[^[:space:]]+[[:space:]]*/, "", directive_args)

	negated = word ~ /^!/
	sub(/^!/, "", word)
	bool = (word in defines || (path,word) in defines) != negated

	if (first_iter) {
	    result = bool
	    first_iter = false
	    continue
	}

	if (alt)
	    result = result || bool
	else # if (directive_args ~ /^AND/)
	    result = result && bool
    }

    return result
}

function include_file(root_path, read_path, included_path, line, verbatim,
		      read_line, result) {
    if (included_path in reading) {
	printf "ERROR: Inclusion loop when including %s in %s\n",
	    included_path, read_path > "/dev/stderr"
	return 1
    }

    if (verbatim) {
	while(true) {
	    result = (getline read_line < included_path)
	    if (result > 0)
		add_line(root_path, read_line)
	    else
		break
	}

	if (result == 0) {
	    close(included_path)
	    return 0
	}

	printf "ERROR: Could not read %s\n", included_path > "/dev/stderr"
    } else {
	if (process_file(root_path, included_path, modes[root_path]) == 0)
	    return 0
    }

    printf "       when including %s in %s\n",
	included_path, read_path > "/dev/stderr"

    return 1
}

function mark_copy_file(root_path, read_path, copied_path, line) {
    if (validate_path(read_path, copied_path, line))
	return 1

    to_copy[copied_path]

    return 0
}

function satisfy_import(root_path, imported_path, as, what,
			added_line, description, count) {
    if ((root_path,as) in imports_from) {
	printf "ERROR: Multiple items imported under the name '%s' in %s\n",
	    as, root_path > "/dev/stderr"
	return 1
    }

    added_line = " " as " = haketilo_exports[\"" imported_path "\"]"
    if (what)
	added_line = added_line "." what

    add_line(root_path, "const" added_line ";", "non_amalgamation_file")
    add_line(root_path, "let"   added_line ";", "amalgamation_root_file")

    count = ++import_counts[root_path]

    imports_as  [root_path,count] = as
    imports_from[root_path,as]    = imported_path
    imports_what[root_path,as]    = what

    if (what)
	description = "'" what "' from " imported_path
    else
	description = imported_path

    description = description " needed by " root_path

    if (imported_path in reading) {
	printf "ERROR: dependency loop when importing %s\n",
	    description > "/dev/stderr"
	return 1
    } else if (process_file(imported_path, imported_path, "js")) {
	printf "       when importing %s\n", description > "/dev/stderr"
	return 1
    }

    if (what && !((imported_path,what) in exports)) {
	printf "ERROR: %s doesn't export '%s' needed by %s\n",
	    imported_path, what, root_path > "/dev/stderr"
	return 1
    }

    return 0
}

function import_js_file(root_path, read_path, directive_args, line,
			imported_path, as) {
    imported_path = first_token(directive_args)
    if (validate_path(read_path, imported_path, line))
	return 1

    if (line ~ (AS_re "$"))
	as = last_token(directive_args)
    else
	as = identifier_from_path(imported_path)

    return satisfy_import(root_path, imported_path, as)
}

function import_from_js_file(root_path, read_path, directive_args, line,
			     imported_path, args_copy, FROM_clause, as) {
    imported_path = first_token(directive_args)
    if (validate_path(read_path, imported_path, line))
	return 1

    args_copy = directive_args
    sub("^" FROM_IMPORT_re, "", args_copy)
    args_copy = "," args_copy

    while (args_copy ~ /,/) {
	sub(/^[^,]*,[[:space:]]*/, "", args_copy)

	FROM_clause = args_copy
	sub(/[[:space:]]*,.*$/, "", FROM_clause)

	if (satisfy_import(root_path, imported_path,
			   last_token(FROM_clause), first_token(FROM_clause)))
	    return 1
    }

    return 0
}

function export_from_js_file(root_path, read_path, directive_args, line,
			     as, exported_item, added_line) {
    as = last_token(directive_args)

    if (directive_args ~ ("^" identifier_re "$")) {
	exported_item = as
    } else {
	exported_item = directive_args
	sub("[[:space:]]+" AS_re "$", "", exported_item)
    }

    if ((root_path,as) in exports) {
	printf "ERROR: Multiple values exported under the name '%s' in %s\n",
	    as, root_path > "/dev/stderr"
	return 1
    }

    added_line = \
    "this.haketilo_exports[\"" root_path "\"]." as " = (" exported_item ");"
    add_line(root_path, added_line)

    exports[root_path,as]

    return 0
}

function compute_deps(js_path, dependencies, count, dependencies_added,
		      i_max, i, as, next_path) {
    delete dependencies_added[0]

    if (process_file(js_path, js_path, "js"))
	return 0

    i_max = import_counts[js_path]
    for (i = 1; i <= i_max; i++) {
	as = imports_as[js_path,i]
	next_path = imports_from[js_path,as]
	if (next_path in dependencies_added)
	    continue

	count = compute_deps(next_path, dependencies, count, dependencies_added)
	if (count < 1)
	    return 0
    }

    dependencies_added[js_path]
    dependencies[++count] = js_path

    return count
}

# Here js_deps and js_deps_count are optional args, used when loading scripts
# into an HTML page to avoid having the same script loaded twice in multiple
# places.
function load_js_file(root_path, read_path, loaded_path, line,
		      js_deps, js_deps_count,
		      js_deps_already_added, i, added_line) {
    delete js_deps[""]
    delete js_deps_already_added[0]

    if (validate_path(read_path, loaded_path, line))
	return 0

    for (i = 1; i <= js_deps_count; i++)
	js_deps_already_added[js_deps[i]]

    i = js_deps_count

    js_deps_count = compute_deps(loaded_path, js_deps,
				 js_deps_count, js_deps_already_added)

    if (js_deps_count < 1) {
	printf "       when loading %s from %s\n",
	    loaded_path, read_path > "/dev/stderr"
	return 0
    }

    while (++i <= js_deps_count) {
	if (modes[root_path] == "html") {
	    added_line = "<script src=\"/" js_deps[i] "\"></script>"
	} else { #if (modes[root_path] == "manifest") {
	    added_line = "\"" js_deps[i] "\""
	    if (i != js_deps_count)
		added_line = added_line ","
	}
	add_line(root_path, added_line)
    }

    return js_deps_count
}

# css_deps is an array used to avoid having the same stylesheet loaded twice in
# multiple places in a single HTML page.
function load_css_file(root_path, read_path, loaded_path, line, css_deps) {
    delete css_deps[""]

    if (validate_path(read_path, loaded_path, line))
	return 1

    if (!(loaded_path in css_deps)) {
	css_deps[loaded_path]
	to_copy[loaded_path]
	added_line = ("<link rel=\"stylesheet\" type=\"text/css\" "	\
		      "href=\"/" loaded_path "\" />")
	add_line(root_path, added_line)
    }

    return 0
}

function load_html_file(root_path, read_path, loaded_path, line) {
    if (validate_path(read_path, loaded_path, line))
	return 1

    if (process_file(loaded_path, loaded_path, "html")) {
	printf "       when loading %s from %s\n",
	    loaded_path, read_path, line > "/dev/stderr"
	return 1
    }

    return 0
}

function print_amalgamation(js_deps, js_deps_count,
			    js_dep_nr, path, max_line_nr, line_nr) {
    delete js_deps[0]

    js_deps_count = compute_deps(js_to_amalgamate, js_deps, 0)
    if (js_deps_count < 1)
	return 1

    # '<' instead of '<=' because we print the main js file below instead
    for (js_dep_nr = 1; js_dep_nr < js_deps_count; js_dep_nr++) {
	path = js_deps[js_dep_nr]
	max_line_nr = lines_count[path]

	for (line_nr = 1; line_nr <= max_line_nr; line_nr++)
	    print lines[path, line_nr]
    }

    for (line_nr = 1; line_nr <= main_js_lines_count; line_nr++)
	    print main_js_lines[line_nr]

    return 0
}

function print_usage() {
    printf "USAGE: %s compute_scripts.awk -- [-D PREPROCESSOR_DEFINITION]... [-M manifest/to/process/manifest.json]... [-H html/to/process.html]... [-J js/to/process.js]... [-A file/to/append/to.js:appended_code]... [--help|-h] [--output-dir=./build] [--write-js-deps] [--write-html-deps] [--output=files-to-copy|--output=amalgamate-js:js/to/process.js]\n",
	ARGV[0] > "/dev/stderr"
}

BEGIN {
    option_arg_patterns["D"] = "^" identifier_re "$"
    option_arg_patterns["M"] = path_re
    option_arg_patterns["H"] = path_re
    option_arg_patterns["J"] = path_re
    option_arg_patterns["A"] = "^" path_re_noanchor ":"
}

function main(i, j, path, letter, dir, max_line_nr, js_deps, js_deps_count,
	      code, tmp_lines) {
    output_dir = "./build"
    write_js_deps = false
    write_html_deps = false

    delete appended_lines[0]
    delete appended_lines_counts[0]
    delete tmp_lines[0]

    delete lines[0]
    delete lines_count[0]

    output = ""
    js_to_amalgamate = ""
    delete main_js_lines[0]

    delete manifests_to_process[0]
    delete html_to_process[0]
    delete js_to_process[0]

    delete explicitly_requested[0]

    for (i = 1; i < ARGC; i++) {
	if (ARGV[i] ~ /^-[DMHJA]$/) {
	    letter = substr(ARGV[i++], 2)
	    if (i == ARGC || ARGV[i] !~ option_arg_patterns[letter]) {
		printf "ERROR: '-%s' option should be followed by an argument matching '%s'\n",
		    letter, option_arg_patterns[letter] > "/dev/stderr"
		return 1
	    }

	    if (letter == "D")
		defines[ARGV[i]]
	    else
		explicitly_requested[ARGV[i]]

	    if (letter == "M")
		manifests_to_process[ARGV[i]]
	    if (letter == "H")
		html_to_process[ARGV[i]]
	    if (letter == "J")
		js_to_process[ARGV[i]]

	    if (letter == "A") {
		path = ARGV[i]
		sub(/:.*$/, "", path)
		if (path in appended_lines_counts) {
		    printf "ERROR: The same file %s given to the '-A' option multiple times\n",
			path > "/dev/stderr"
		    return 1
		}

		clear_array(tmp_lines)
		code = ARGV[i]
		sub(/^[^:]+:/, "", code)
		appended_lines_counts[path] = split(code, tmp_lines, "\n")
		for (j = appended_lines_counts[path]; j > 0; j--)
		    appended_lines[path,j] = tmp_lines[j]
	    }
	} else if (ARGV[i] ~ /^-(-help|h)$/ ) {
	    print_usage()
	    return 0
	} else if (ARGV[i] ~ /^--output-dir=/) {
	    output_dir = ARGV[i]
	    sub(/^--output-dir=/, "", output_dir)
	} else if (ARGV[i] ~ /^--write-js-deps$/) {
	    write_js_deps = true
	} else if (ARGV[i] ~ /^--write-html-deps$/) {
	    write_html_deps = true
	} else if (ARGV[i] ~ /^--output=files-to-copy$/) {
	    output = "files-to-copy"
	} else if (ARGV[i] ~ /^--output=amalgamate-js:/) {
	    output = "amalgamate-js"
	    js_to_amalgamate = ARGV[i]
	    sub(/^--output=amalgamate-js:/, "", js_to_amalgamate)
	    if (js_to_amalgamate !~ path_re) {
		printf "ERROR: amalgamate-js path does not match '%s': %s\n",
		    path_re, js_to_amalgamate > "/dev/stderr"
		return 1
	    }
	} else {
		printf "ERROR: Unknown option '%s'\n", ARGV[i] > "/dev/stderr"
		print_usage()
		return 1
	}
    }

    if (is_empty(explicitly_requested) && output != "amalgamate-js") {
	explicitly_requested["manifest.json"]
	manifests_to_process["manifest.json"]
    }

    for (path in manifests_to_process) {
	if (process_file(path, path, "manifest"))
	    return 1
    }
    for (path in html_to_process) {
	if (process_file(path, path, "html"))
	    return 1
    }
    for (path in js_to_process) {
	if (process_file(path, path, "js"))
	    return 1
    }

    for (path in lines_count) {
	if (!(path in explicitly_requested) &&
	    !(modes[path] == "js" && write_js_deps) &&
	    !(modes[path] == "html" && write_html_deps))
	    continue

	dir = path
	sub(/[^/]*$/, "", dir)
	dir = output_dir "/" dir
	sub("'", "'\\''", dir)

	system("mkdir -p '" dir "'")

	printf "" > (output_dir "/" path)

	max_line_nr = lines_count[path]
	for (i = 1; i <= max_line_nr; i++)
	    print lines[path, i] >> (output_dir "/" path)
    }

    if (output == "files-to-copy") {
	for (path in to_copy)
	    print path
    }

    if (output == "amalgamate-js") {
	if (print_amalgamation())
	    return 1
    }

    return 0
}

BEGIN {
    exit main()
}