#!/usr/bin/awk -f # # SPDX-License-Identifier: CC0-1.0 # # Process javascript files and resolve dependencies between them # # This file is part of Haketilo # # Copyright (C) 2021, Wojtek Kosior # # This program is free software: you can redistribute it and/or modify # it under the terms of the CC0 1.0 Universal License as published by # the Creative Commons Corporation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # CC0 1.0 Universal License for more details. BEGIN { true = 1 false = 0 } BEGIN { identifier_re = "[_a-zA-Z][_a-zA-Z0-9]*" path_dir_re = "([-_a-zA-Z0-9][-._a-zA-Z0-9]*/)*" path_ext_re = "(\\.[-_.a-zA-Z0-9]*)?" path_re_noanchor = path_dir_re identifier_re path_ext_re path_re = "^" path_re_noanchor "$" if_clause_re = "!?" identifier_re if_AND_re = "([[:space:]]+&&[[:space:]]+" if_clause_re ")*" if_OR_re = "([[:space:]]+[|][|][[:space:]]+" if_clause_re ")*" directive_args_patterns["IF"] = ("^" if_clause_re \ "(" if_AND_re "|" if_OR_re ")$") directive_args_patterns["ENDIF"] = "^$" directive_args_patterns["ELSE"] = "^$" directive_args_patterns["ELIF"] = "^(NOT[[:space:]]+)?" identifier_re "$" directive_args_patterns["DEFINE"] = "^" identifier_re "$" directive_args_patterns["UNDEF"] = "^" identifier_re "$" directive_args_patterns["ERROR"] = "^.*$" directive_args_patterns["COPY"] = "^[^[:space:]]+$" directive_args_patterns["INCLUDE"] = "^[^[:space:]]+$" directive_args_patterns["INCLUDE_VERBATIM"] = "^[^[:space:]]+$" AS_re = "AS[[:space:]]+" identifier_re maybe_AS_re = "([[:space:]]+" AS_re ")?" FROM_clause_re = identifier_re maybe_AS_re more_FROM_clauses_re = "([[:space:]]*,[[:space:]]*" FROM_clause_re ")*" FROM_IMPORT_re = "[^[:space:]]+[[:space:]]+IMPORT[[:space:]]+" EXPORT_AS_re = ".*[^[:space:]][[:space:]]+" AS_re directive_args_patterns["IMPORT"] = "^[^[:space:]]+" maybe_AS_re "$" directive_args_patterns["FROM"] = ("^" FROM_IMPORT_re FROM_clause_re \ more_FROM_clauses_re "$") directive_args_patterns["EXPORT"] = "^(" EXPORT_AS_re "|" identifier_re ")$" directive_args_patterns["LOADJS"] = "^[^[:space:]]+$" directive_args_patterns["LOADCSS"] = "^[^[:space:]]+$" directive_args_patterns["LOADHTML"] = "^[^[:space:]]+$" } function validate_path(read_path, path, line) { if (path !~ (path_re)) { printf "ERROR: File path in %s does not match '%s': %s\n", read_path, path_re, line > "/dev/stderr" return 1 } return 0 } function identifier_from_path(path) { sub("^" path_dir_re, "", path) sub(path_ext_re "$", "", path) return path } function last_token(line) { sub("^.*[[:space:]]", "", line) return line } function first_token(line) { sub("[[:space:]].*$", "", line) return line } function is_empty(array, key) { for (key in array) return false return true } function clear_array(array, key) { for (key in array) delete array[key] } function add_line(path, line, where) { if (where != "amalgamation_root_file") lines[path,++lines_count[path]] = line if (where != "non_amalgamation_file" && path == js_to_amalgamate) main_js_lines[++main_js_lines_count] = line } BEGIN { delete page_js_deps[0] page_js_deps_count = 0 } function process_file(path, read_path, mode, line, result, line_part, directive, directive_args, if_nesting, if_nesting_true, if_branch_processed) { if (path in modes && modes[path] != mode) { printf "ERROR: File %s used multiple times in different contexts\n", path > "/dev/stderr" return 1 } if (mode == "html" && path == read_path) { clear_array(page_js_deps) page_js_deps_count = 0 clear_array(page_css_deps) } modes[path] = mode if (!(path in reading)) { if (path in lines_count) return 0 lines_count[path] } reading[read_path] if (mode == "js" && path == read_path) { add_line(path, "\"use strict\";") add_line(path, "this.haketilo_exports = this.haketilo_exports || {};") add_line(path, "this.haketilo_exports[\"" path "\"] = {};") add_line(path, "window.globalThis = this", "amalgamation_root_file") add_line(path, "") add_line(path, "(function() {", "non_amalgamation_file") add_line(path, "var globalThis = this.haketilo_this", "non_amalgamation_file") add_line(path, "{", "non_amalgamation_file") } while (true) { result = (getline line < read_path) if (result < 0) { printf "ERROR: Could not read %s\n", read_path > "/dev/stderr" return 1 } if (result == 0) { if (!(path in appended_lines_counts) || \ additional_line_nr[path] == appended_lines_counts[path]) break line = appended_lines[path,++additional_line_nr[path]] } if (line !~ /^#/) { if (if_nesting_true == if_nesting) add_line(path, line) continue } while (line ~ /\\$/) { sub(/\\$/, "", line) result = (getline line_part < read_path) if (result < 0) { printf "ERROR: Could not read %s\n", read_path > "/dev/stderr" return 1 } if (result == 0) { if (path in appended_lines_counts && \ additional_line_nr[path] < appended_lines_counts[path]) { line_part = appended_lines[path,++additional_line_nr[path]] } else { printf "ERROR: Unexpected EOF in %s\n", read_path > "/dev/stderr" return 1 } } line = line " " line_part } directive = substr(line, 2) sub(/[[:space:]].*$/, "", directive) if (directive !~ \ /^(IF|ENDIF|ELSE|ELIF|DEFINE|UNDEF|ERROR|INCLUDE|INCLUDE_VERBATIM|COPY_FILE)$/ && (mode != "js" || directive !~ /^(IMPORT|FROM|EXPORT)$/) && (mode != "html" || directive !~ /^(LOADJS|LOADCSS)$/) && (mode != "manifest" || directive !~ /^(LOADJS|LOADHTML)$/)) { printf "ERROR: Invalid # directive in %s: %s\n", read_path, line > "/dev/stderr" return 1 } directive_args = line sub(/^#[^[:space:]]*[[:space:]]*/, "", directive_args) sub(/[[:space:]]*$/, "", directive_args) if (directive_args !~ directive_args_patterns[directive]) { printf "ERROR: #%s arguments in %s do not match '%s': %s\n", directive, read_path, directive_args_patterns[directive], line \ > "/dev/stderr" return 1 } if (directive == "IF") { if (if_nesting_true == if_nesting) { if (if_condition_true(directive_args, path)) if_nesting_true++ else if_branch_processed = false } if_nesting++ } else if (directive == "ENDIF") { if (if_nesting == 0) { printf "ERROR: Spurious #ENDIF in %s\n", read_path > "/dev/stderr" return 1 } if (if_nesting_true == if_nesting) if_nesting_true-- if_nesting-- } else if (directive == "ELSE") { if (if_nesting == 0) { printf "ERROR: Spurious #ELSE in %s\n", read_path > "/dev/stderr" return 1 } if (if_nesting == if_nesting_true + 1 && !if_branch_processed) { if_nesting_true++ } else if (if_nesting == if_nesting_true) { if_branch_processed = true if_nesting_true-- } } else if (directive == "ELIF") { if (if_nesting == 0) { printf "ERROR: Spurious #ELIF in %s\n", read_path > "/dev/stderr" return 1 } if (if_nesting == if_nesting_true + 1 && !if_branch_processed && if_condition_true(directive_args, path)) { if_nesting_true++ } else if (if_nesting == if_nesting_true) { if_branch_processed = true if_nesting_true-- } } else if (if_nesting_true != if_nesting) { continue } else if (directive == "DEFINE") { defines[path,directive_args] } else if (directive == "UNDEF") { delete defines[path,directive_args] } else if (directive == "ERROR") { printf "ERROR: File %s says: %s\n", read_path, directive_args > "/dev/stderr" return 1 } else if (directive == "INCLUDE") { if (include_file(path, read_path, directive_args, line)) return 1 } else if (directive == "INCLUDE_VERBATIM") { if (include_file(path, read_path, directive_args, line, true)) return 1 } else if (directive == "COPY_FILE") { if (mark_copy_file(path, read_path, directive_args, line)) return 1 } else if (directive == "IMPORT") { if (import_js_file(path, read_path, directive_args, line)) return 1 } else if (directive == "FROM") { if (import_from_js_file(path, read_path, directive_args, line)) return 1 } else if (directive == "EXPORT") { if (export_from_js_file(path, read_path, directive_args, line)) return 1 } else if (directive == "LOADJS") { if (mode == "html") { page_js_deps_count = \ load_js_file(path, read_path, directive_args, line, page_js_deps, page_js_deps_count) if (page_js_deps_count < 1) return 1 } else if (mode == "manifest") { if (load_js_file(path, read_path, directive_args, line) < 1) return 1 } } else if (directive == "LOADCSS") { if (load_css_file(path, read_path, directive_args, line, page_css_deps)) return 1 } else if (directive == "LOADHTML") { if (load_html_file(path, read_path, directive_args, line)) return 1 } } close(read_path) if (if_nesting) { printf "ERROR: Unterminated #IF in %s\n", read_path > "/dev/stderr" return 1 } if (mode == "js" && path == read_path) { add_line(path, "}", "non_amalgamation_file") add_line(path, "}).call({", "non_amalgamation_file") add_line(path, " haketilo_exports: this.haketilo_exports,", "non_amalgamation_file") add_line(path, " haketilo_this: this", "non_amalgamation_file") add_line(path, "});", "non_amalgamation_file") } delete reading[read_path] } function if_condition_true(directive_args, path, result, bool, first_iter, word, negated, alt) { first_iter = true while (directive_args) { word = first_token(directive_args) sub(/^[^[:space:]]+[[:space:]]*/, "", directive_args) alt = alt || directive_args ~ /^[|][|]/ sub(/^[^[:space:]]+[[:space:]]*/, "", directive_args) negated = word ~ /^!/ sub(/^!/, "", word) bool = (word in defines || (path,word) in defines) != negated if (first_iter) { result = bool first_iter = false continue } if (alt) result = result || bool else # if (directive_args ~ /^AND/) result = result && bool } return result } function include_file(root_path, read_path, included_path, line, verbatim, read_line, result) { if (included_path in reading) { printf "ERROR: Inclusion loop when including %s in %s\n", included_path, read_path > "/dev/stderr" return 1 } if (verbatim) { while(true) { result = (getline read_line < included_path) if (result > 0) add_line(root_path, read_line) else break } if (result == 0) { close(included_path) return 0 } printf "ERROR: Could not read %s\n", included_path > "/dev/stderr" } else { if (process_file(root_path, included_path, modes[root_path]) == 0) return 0 } printf " when including %s in %s\n", included_path, read_path > "/dev/stderr" return 1 } function mark_copy_file(root_path, read_path, copied_path, line) { if (validate_path(read_path, copied_path, line)) return 1 to_copy[copied_path] return 0 } function satisfy_import(root_path, imported_path, as, what, added_line, description, count) { if ((root_path,as) in imports_from) { printf "ERROR: Multiple items imported under the name '%s' in %s\n", as, root_path > "/dev/stderr" return 1 } added_line = " " as " = haketilo_exports[\"" imported_path "\"]" if (what) added_line = added_line "." what add_line(root_path, "const" added_line ";", "non_amalgamation_file") add_line(root_path, "let" added_line ";", "amalgamation_root_file") count = ++import_counts[root_path] imports_as [root_path,count] = as imports_from[root_path,as] = imported_path imports_what[root_path,as] = what if (what) description = "'" what "' from " imported_path else description = imported_path description = description " needed by " root_path if (imported_path in reading) { printf "ERROR: dependency loop when importing %s\n", description > "/dev/stderr" return 1 } else if (process_file(imported_path, imported_path, "js")) { printf " when importing %s\n", description > "/dev/stderr" return 1 } if (what && !((imported_path,what) in exports)) { printf "ERROR: %s doesn't export '%s' needed by %s\n", imported_path, what, root_path > "/dev/stderr" return 1 } return 0 } function import_js_file(root_path, read_path, directive_args, line, imported_path, as) { imported_path = first_token(directive_args) if (validate_path(read_path, imported_path, line)) return 1 if (line ~ (AS_re "$")) as = last_token(directive_args) else as = identifier_from_path(imported_path) return satisfy_import(root_path, imported_path, as) } function import_from_js_file(root_path, read_path, directive_args, line, imported_path, args_copy, FROM_clause, as) { imported_path = first_token(directive_args) if (validate_path(read_path, imported_path, line)) return 1 args_copy = directive_args sub("^" FROM_IMPORT_re, "", args_copy) args_copy = "," args_copy while (args_copy ~ /,/) { sub(/^[^,]*,[[:space:]]*/, "", args_copy) FROM_clause = args_copy sub(/[[:space:]]*,.*$/, "", FROM_clause) if (satisfy_import(root_path, imported_path, last_token(FROM_clause), first_token(FROM_clause))) return 1 } return 0 } function export_from_js_file(root_path, read_path, directive_args, line, as, exported_item, added_line) { as = last_token(directive_args) if (directive_args ~ ("^" identifier_re "$")) { exported_item = as } else { exported_item = directive_args sub("[[:space:]]+" AS_re "$", "", exported_item) } if ((root_path,as) in exports) { printf "ERROR: Multiple values exported under the name '%s' in %s\n", as, root_path > "/dev/stderr" return 1 } added_line = \ "this.haketilo_exports[\"" root_path "\"]." as " = (" exported_item ");" add_line(root_path, added_line) exports[root_path,as] return 0 } function compute_deps(js_path, dependencies, count, dependencies_added, i_max, i, as, next_path) { delete dependencies_added[0] if (process_file(js_path, js_path, "js")) return 0 i_max = import_counts[js_path] for (i = 1; i <= i_max; i++) { as = imports_as[js_path,i] next_path = imports_from[js_path,as] if (next_path in dependencies_added) continue count = compute_deps(next_path, dependencies, count, dependencies_added) if (count < 1) return 0 } dependencies_added[js_path] dependencies[++count] = js_path return count } # Here js_deps and js_deps_count are optional args, used when loading scripts # into an HTML page to avoid having the same script loaded twice in multiple # places. function load_js_file(root_path, read_path, loaded_path, line, js_deps, js_deps_count, js_deps_already_added, i, added_line) { delete js_deps[""] delete js_deps_already_added[0] if (validate_path(read_path, loaded_path, line)) return 0 for (i = 1; i <= js_deps_count; i++) js_deps_already_added[js_deps[i]] i = js_deps_count js_deps_count = compute_deps(loaded_path, js_deps, js_deps_count, js_deps_already_added) if (js_deps_count < 1) { printf " when loading %s from %s\n", loaded_path, read_path > "/dev/stderr" return 0 } while (++i <= js_deps_count) { if (modes[root_path] == "html") { added_line = "" } else { #if (modes[root_path] == "manifest") { added_line = "\"" js_deps[i] "\"" if (i != js_deps_count) added_line = added_line "," } add_line(root_path, added_line) } return js_deps_count } # css_deps is an array used to avoid having the same stylesheet loaded twice in # multiple places in a single HTML page. function load_css_file(root_path, read_path, loaded_path, line, css_deps) { delete css_deps[""] if (validate_path(read_path, loaded_path, line)) return 1 if (!(loaded_path in css_deps)) { css_deps[loaded_path] to_copy[loaded_path] added_line = ("") add_line(root_path, added_line) } return 0 } function load_html_file(root_path, read_path, loaded_path, line) { if (validate_path(read_path, loaded_path, line)) return 1 if (process_file(loaded_path, loaded_path, "html")) { printf " when loading %s from %s\n", loaded_path, read_path, line > "/dev/stderr" return 1 } return 0 } function print_amalgamation(js_deps, js_deps_count, js_dep_nr, path, max_line_nr, line_nr) { delete js_deps[0] js_deps_count = compute_deps(js_to_amalgamate, js_deps, 0) if (js_deps_count < 1) return 1 # '<' instead of '<=' because we print the main js file below instead for (js_dep_nr = 1; js_dep_nr < js_deps_count; js_dep_nr++) { path = js_deps[js_dep_nr] max_line_nr = lines_count[path] for (line_nr = 1; line_nr <= max_line_nr; line_nr++) print lines[path, line_nr] } for (line_nr = 1; line_nr <= main_js_lines_count; line_nr++) print main_js_lines[line_nr] return 0 } function print_usage() { printf "USAGE: %s compute_scripts.awk -- [-D PREPROCESSOR_DEFINITION]... [-M manifest/to/process/manifest.json]... [-H html/to/process.html]... [-J js/to/process.js]... [-A file/to/append/to.js:appended_code]... [--help|-h] [--output-dir=./build] [--write-js-deps] [--write-html-deps] [--output=files-to-copy|--output=amalgamate-js:js/to/process.js]\n", ARGV[0] > "/dev/stderr" } BEGIN { option_arg_patterns["D"] = "^" identifier_re "$" option_arg_patterns["M"] = path_re option_arg_patterns["H"] = path_re option_arg_patterns["J"] = path_re option_arg_patterns["A"] = "^" path_re_noanchor ":" } function main(i, j, path, letter, dir, max_line_nr, js_deps, js_deps_count, code, tmp_lines) { output_dir = "./build" write_js_deps = false write_html_deps = false delete appended_lines[0] delete appended_lines_counts[0] delete tmp_lines[0] delete lines[0] delete lines_count[0] output = "" js_to_amalgamate = "" delete main_js_lines[0] delete manifests_to_process[0] delete html_to_process[0] delete js_to_process[0] delete explicitly_requested[0] for (i = 1; i < ARGC; i++) { if (ARGV[i] ~ /^-[DMHJA]$/) { letter = substr(ARGV[i++], 2) if (i == ARGC || ARGV[i] !~ option_arg_patterns[letter]) { printf "ERROR: '-%s' option should be followed by an argument matching '%s'\n", letter, option_arg_patterns[letter] > "/dev/stderr" return 1 } if (letter == "D") defines[ARGV[i]] else explicitly_requested[ARGV[i]] if (letter == "M") manifests_to_process[ARGV[i]] if (letter == "H") html_to_process[ARGV[i]] if (letter == "J") js_to_process[ARGV[i]] if (letter == "A") { path = ARGV[i] sub(/:.*$/, "", path) if (path in appended_lines_counts) { printf "ERROR: The same file %s given to the '-A' option multiple times\n", path > "/dev/stderr" return 1 } clear_array(tmp_lines) code = ARGV[i] sub(/^[^:]+:/, "", code) appended_lines_counts[path] = split(code, tmp_lines, "\n") for (j = appended_lines_counts[path]; j > 0; j--) appended_lines[path,j] = tmp_lines[j] } } else if (ARGV[i] ~ /^-(-help|h)$/ ) { print_usage() return 0 } else if (ARGV[i] ~ /^--output-dir=/) { output_dir = ARGV[i] sub(/^--output-dir=/, "", output_dir) } else if (ARGV[i] ~ /^--write-js-deps$/) { write_js_deps = true } else if (ARGV[i] ~ /^--write-html-deps$/) { write_html_deps = true } else if (ARGV[i] ~ /^--output=files-to-copy$/) { output = "files-to-copy" } else if (ARGV[i] ~ /^--output=amalgamate-js:/) { output = "amalgamate-js" js_to_amalgamate = ARGV[i] sub(/^--output=amalgamate-js:/, "", js_to_amalgamate) if (js_to_amalgamate !~ path_re) { printf "ERROR: amalgamate-js path does not match '%s': %s\n", path_re, js_to_amalgamate > "/dev/stderr" return 1 } } else { printf "ERROR: Unknown option '%s'\n", ARGV[i] > "/dev/stderr" print_usage() return 1 } } if (is_empty(explicitly_requested) && output != "amalgamate-js") { explicitly_requested["manifest.json"] manifests_to_process["manifest.json"] } for (path in manifests_to_process) { if (process_file(path, path, "manifest")) return 1 } for (path in html_to_process) { if (process_file(path, path, "html")) return 1 } for (path in js_to_process) { if (process_file(path, path, "js")) return 1 } for (path in lines_count) { if (!(path in explicitly_requested) && !(modes[path] == "js" && write_js_deps) && !(modes[path] == "html" && write_html_deps)) continue dir = path sub(/[^/]*$/, "", dir) dir = output_dir "/" dir sub("'", "'\\''", dir) system("mkdir -p '" dir "'") printf "" > (output_dir "/" path) max_line_nr = lines_count[path] for (i = 1; i <= max_line_nr; i++) print lines[path, i] >> (output_dir "/" path) } if (output == "files-to-copy") { for (path in to_copy) print path } if (output == "amalgamate-js") { if (print_amalgamation()) return 1 } return 0 } BEGIN { exit main() }