#!/usr/bin/awk -f
#
# SPDX-License-Identifier: CC0-1.0
#
# Process javascript files and resolve dependencies between them
#
# This file is part of Haketilo
#
# Copyright (C) 2021, Wojtek Kosior
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the CC0 1.0 Universal License as published by
# the Creative Commons Corporation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# CC0 1.0 Universal License for more details.
BEGIN {
true = 1
false = 0
}
BEGIN {
identifier_re = "[_a-zA-Z][_a-zA-Z0-9]*"
path_dir_re = "([-_a-zA-Z0-9][-._a-zA-Z0-9]*/)*"
path_ext_re = "(\\.[-_.a-zA-Z0-9]*)?"
path_re_noanchor = path_dir_re identifier_re path_ext_re
path_re = "^" path_re_noanchor "$"
if_clause_re = "!?" identifier_re
if_AND_re = "([[:space:]]+&&[[:space:]]+" if_clause_re ")*"
if_OR_re = "([[:space:]]+[|][|][[:space:]]+" if_clause_re ")*"
directive_args_patterns["IF"] = ("^" if_clause_re \
"(" if_AND_re "|" if_OR_re ")$")
directive_args_patterns["ENDIF"] = "^$"
directive_args_patterns["ELSE"] = "^$"
directive_args_patterns["ELIF"] = "^(NOT[[:space:]]+)?" identifier_re "$"
directive_args_patterns["DEFINE"] = "^" identifier_re "$"
directive_args_patterns["UNDEF"] = "^" identifier_re "$"
directive_args_patterns["ERROR"] = "^.*$"
directive_args_patterns["COPY"] = "^[^[:space:]]+$"
directive_args_patterns["INCLUDE"] = "^[^[:space:]]+$"
directive_args_patterns["INCLUDE_VERBATIM"] = "^[^[:space:]]+$"
AS_re = "AS[[:space:]]+" identifier_re
maybe_AS_re = "([[:space:]]+" AS_re ")?"
FROM_clause_re = identifier_re maybe_AS_re
more_FROM_clauses_re = "([[:space:]]*,[[:space:]]*" FROM_clause_re ")*"
FROM_IMPORT_re = "[^[:space:]]+[[:space:]]+IMPORT[[:space:]]+"
EXPORT_AS_re = ".*[^[:space:]][[:space:]]+" AS_re
directive_args_patterns["IMPORT"] = "^[^[:space:]]+" maybe_AS_re "$"
directive_args_patterns["FROM"] = ("^" FROM_IMPORT_re FROM_clause_re \
more_FROM_clauses_re "$")
directive_args_patterns["EXPORT"] = "^(" EXPORT_AS_re "|" identifier_re ")$"
directive_args_patterns["LOADJS"] = "^[^[:space:]]+$"
directive_args_patterns["LOADCSS"] = "^[^[:space:]]+$"
directive_args_patterns["LOADHTML"] = "^[^[:space:]]+$"
}
function validate_path(read_path, path, line) {
if (path !~ (path_re)) {
printf "ERROR: File path in %s does not match '%s': %s\n",
read_path, path_re, line > "/dev/stderr"
return 1
}
return 0
}
function identifier_from_path(path) {
sub("^" path_dir_re, "", path)
sub(path_ext_re "$", "", path)
return path
}
function last_token(line) {
sub("^.*[[:space:]]", "", line)
return line
}
function first_token(line) {
sub("[[:space:]].*$", "", line)
return line
}
function is_empty(array, key) {
for (key in array)
return false
return true
}
function clear_array(array, key) {
for (key in array)
delete array[key]
}
function add_line(path, line, where) {
if (where != "amalgamation_root_file")
lines[path,++lines_count[path]] = line
if (where != "non_amalgamation_file" &&
path == js_to_amalgamate)
main_js_lines[++main_js_lines_count] = line
}
BEGIN {
delete page_js_deps[0]
page_js_deps_count = 0
}
function process_file(path, read_path, mode,
line, result, line_part, directive, directive_args,
if_nesting, if_nesting_true, if_branch_processed) {
if (path in modes && modes[path] != mode) {
printf "ERROR: File %s used multiple times in different contexts\n",
path > "/dev/stderr"
return 1
}
if (mode == "html" && path == read_path) {
clear_array(page_js_deps)
page_js_deps_count = 0
clear_array(page_css_deps)
}
modes[path] = mode
if (!(path in reading)) {
if (path in lines_count)
return 0
lines_count[path]
}
reading[read_path]
if (mode == "js" && path == read_path) {
add_line(path, "\"use strict\";")
add_line(path, "this.haketilo_exports = this.haketilo_exports || {};")
add_line(path, "this.haketilo_exports[\"" path "\"] = {};")
add_line(path, "window.globalThis = this", "amalgamation_root_file")
add_line(path, "")
add_line(path, "(function() {", "non_amalgamation_file")
add_line(path, "var globalThis = this.haketilo_this",
"non_amalgamation_file")
add_line(path, "{", "non_amalgamation_file")
}
while (true) {
result = (getline line < read_path)
if (result < 0) {
printf "ERROR: Could not read %s\n", read_path > "/dev/stderr"
return 1
}
if (result == 0) {
if (!(path in appended_lines_counts) || \
additional_line_nr[path] == appended_lines_counts[path])
break
line = appended_lines[path,++additional_line_nr[path]]
}
if (line !~ /^#/) {
if (if_nesting_true == if_nesting)
add_line(path, line)
continue
}
while (line ~ /\\$/) {
sub(/\\$/, "", line)
result = (getline line_part < read_path)
if (result < 0) {
printf "ERROR: Could not read %s\n", read_path > "/dev/stderr"
return 1
}
if (result == 0) {
if (path in appended_lines_counts && \
additional_line_nr[path] < appended_lines_counts[path]) {
line_part = appended_lines[path,++additional_line_nr[path]]
} else {
printf "ERROR: Unexpected EOF in %s\n",
read_path > "/dev/stderr"
return 1
}
}
line = line " " line_part
}
directive = substr(line, 2)
sub(/[[:space:]].*$/, "", directive)
if (directive !~ \
/^(IF|ENDIF|ELSE|ELIF|DEFINE|UNDEF|ERROR|INCLUDE|INCLUDE_VERBATIM|COPY_FILE)$/ &&
(mode != "js" || directive !~ /^(IMPORT|FROM|EXPORT)$/) &&
(mode != "html" || directive !~ /^(LOADJS|LOADCSS)$/) &&
(mode != "manifest" || directive !~ /^(LOADJS|LOADHTML)$/)) {
printf "ERROR: Invalid # directive in %s: %s\n",
read_path, line > "/dev/stderr"
return 1
}
directive_args = line
sub(/^#[^[:space:]]*[[:space:]]*/, "", directive_args)
sub(/[[:space:]]*$/, "", directive_args)
if (directive_args !~ directive_args_patterns[directive]) {
printf "ERROR: #%s arguments in %s do not match '%s': %s\n",
directive, read_path, directive_args_patterns[directive], line \
> "/dev/stderr"
return 1
}
if (directive == "IF") {
if (if_nesting_true == if_nesting) {
if (if_condition_true(directive_args, path))
if_nesting_true++
else
if_branch_processed = false
}
if_nesting++
} else if (directive == "ENDIF") {
if (if_nesting == 0) {
printf "ERROR: Spurious #ENDIF in %s\n",
read_path > "/dev/stderr"
return 1
}
if (if_nesting_true == if_nesting)
if_nesting_true--
if_nesting--
} else if (directive == "ELSE") {
if (if_nesting == 0) {
printf "ERROR: Spurious #ELSE in %s\n",
read_path > "/dev/stderr"
return 1
}
if (if_nesting == if_nesting_true + 1 && !if_branch_processed) {
if_nesting_true++
} else if (if_nesting == if_nesting_true) {
if_branch_processed = true
if_nesting_true--
}
} else if (directive == "ELIF") {
if (if_nesting == 0) {
printf "ERROR: Spurious #ELIF in %s\n",
read_path > "/dev/stderr"
return 1
}
if (if_nesting == if_nesting_true + 1 && !if_branch_processed &&
if_condition_true(directive_args, path)) {
if_nesting_true++
} else if (if_nesting == if_nesting_true) {
if_branch_processed = true
if_nesting_true--
}
} else if (if_nesting_true != if_nesting) {
continue
} else if (directive == "DEFINE") {
defines[path,directive_args]
} else if (directive == "UNDEF") {
delete defines[path,directive_args]
} else if (directive == "ERROR") {
printf "ERROR: File %s says: %s\n",
read_path, directive_args > "/dev/stderr"
return 1
} else if (directive == "INCLUDE") {
if (include_file(path, read_path, directive_args, line))
return 1
} else if (directive == "INCLUDE_VERBATIM") {
if (include_file(path, read_path, directive_args, line, true))
return 1
} else if (directive == "COPY_FILE") {
if (mark_copy_file(path, read_path, directive_args, line))
return 1
} else if (directive == "IMPORT") {
if (import_js_file(path, read_path, directive_args, line))
return 1
} else if (directive == "FROM") {
if (import_from_js_file(path, read_path, directive_args, line))
return 1
} else if (directive == "EXPORT") {
if (export_from_js_file(path, read_path, directive_args, line))
return 1
} else if (directive == "LOADJS") {
if (mode == "html") {
page_js_deps_count = \
load_js_file(path, read_path, directive_args, line,
page_js_deps, page_js_deps_count)
if (page_js_deps_count < 1)
return 1
} else if (mode == "manifest") {
if (load_js_file(path, read_path, directive_args, line) < 1)
return 1
}
} else if (directive == "LOADCSS") {
if (load_css_file(path, read_path, directive_args, line,
page_css_deps))
return 1
} else if (directive == "LOADHTML") {
if (load_html_file(path, read_path, directive_args, line))
return 1
}
}
close(read_path)
if (if_nesting) {
printf "ERROR: Unterminated #IF in %s\n", read_path > "/dev/stderr"
return 1
}
if (mode == "js" && path == read_path) {
add_line(path, "}", "non_amalgamation_file")
add_line(path, "}).call({", "non_amalgamation_file")
add_line(path, " haketilo_exports: this.haketilo_exports,",
"non_amalgamation_file")
add_line(path, " haketilo_this: this",
"non_amalgamation_file")
add_line(path, "});", "non_amalgamation_file")
}
delete reading[read_path]
}
function if_condition_true(directive_args, path,
result, bool, first_iter, word, negated, alt) {
first_iter = true
while (directive_args) {
word = first_token(directive_args)
sub(/^[^[:space:]]+[[:space:]]*/, "", directive_args)
alt = alt || directive_args ~ /^[|][|]/
sub(/^[^[:space:]]+[[:space:]]*/, "", directive_args)
negated = word ~ /^!/
sub(/^!/, "", word)
bool = (word in defines || (path,word) in defines) != negated
if (first_iter) {
result = bool
first_iter = false
continue
}
if (alt)
result = result || bool
else # if (directive_args ~ /^AND/)
result = result && bool
}
return result
}
function include_file(root_path, read_path, included_path, line, verbatim,
read_line, result) {
if (validate_path(read_path, included_path, line))
return 1
if (included_path in reading) {
printf "ERROR: Inclusion loop when including %s in %s\n",
included_path, read_path > "/dev/stderr"
return 1
}
if (verbatim) {
while(true) {
result = (getline read_line < included_path)
if (result > 0)
add_line(root_path, read_line)
else
break
}
if (result == 0) {
close(included_path)
return 0
}
printf "ERROR: Could not read %s\n", included_path > "/dev/stderr"
} else {
if (process_file(root_path, included_path, modes[root_path]) == 0)
return 0
}
printf " when including %s in %s\n",
included_path, read_path > "/dev/stderr"
return 1
}
function mark_copy_file(root_path, read_path, copied_path, line) {
if (validate_path(read_path, copied_path, line))
return 1
to_copy[copied_path]
return 0
}
function satisfy_import(root_path, imported_path, as, what,
added_line, description, count) {
if ((root_path,as) in imports_from) {
printf "ERROR: Multiple items imported under the name '%s' in %s\n",
as, root_path > "/dev/stderr"
return 1
}
added_line = " " as " = haketilo_exports[\"" imported_path "\"]"
if (what)
added_line = added_line "." what
add_line(root_path, "const" added_line ";", "non_amalgamation_file")
add_line(root_path, "let" added_line ";", "amalgamation_root_file")
count = ++import_counts[root_path]
imports_as [root_path,count] = as
imports_from[root_path,as] = imported_path
imports_what[root_path,as] = what
if (what)
description = "'" what "' from " imported_path
else
description = imported_path
description = description " needed by " root_path
if (imported_path in reading) {
printf "ERROR: dependency loop when importing %s\n",
description > "/dev/stderr"
return 1
} else if (process_file(imported_path, imported_path, "js")) {
printf " when importing %s\n", description > "/dev/stderr"
return 1
}
if (what && !((imported_path,what) in exports)) {
printf "ERROR: %s doesn't export '%s' needed by %s\n",
imported_path, what, root_path > "/dev/stderr"
return 1
}
return 0
}
function import_js_file(root_path, read_path, directive_args, line,
imported_path, as) {
imported_path = first_token(directive_args)
if (validate_path(read_path, imported_path, line))
return 1
if (line ~ (AS_re "$"))
as = last_token(directive_args)
else
as = identifier_from_path(imported_path)
return satisfy_import(root_path, imported_path, as)
}
function import_from_js_file(root_path, read_path, directive_args, line,
imported_path, args_copy, FROM_clause, as) {
imported_path = first_token(directive_args)
if (validate_path(read_path, imported_path, line))
return 1
args_copy = directive_args
sub("^" FROM_IMPORT_re, "", args_copy)
args_copy = "," args_copy
while (args_copy ~ /,/) {
sub(/^[^,]*,[[:space:]]*/, "", args_copy)
FROM_clause = args_copy
sub(/[[:space:]]*,.*$/, "", FROM_clause)
if (satisfy_import(root_path, imported_path,
last_token(FROM_clause), first_token(FROM_clause)))
return 1
}
return 0
}
function export_from_js_file(root_path, read_path, directive_args, line,
as, exported_item, added_line) {
as = last_token(directive_args)
if (directive_args ~ ("^" identifier_re "$")) {
exported_item = as
} else {
exported_item = directive_args
sub("[[:space:]]+" AS_re "$", "", exported_item)
}
if ((root_path,as) in exports) {
printf "ERROR: Multiple values exported under the name '%s' in %s\n",
as, root_path > "/dev/stderr"
return 1
}
added_line = \
"this.haketilo_exports[\"" root_path "\"]." as " = (" exported_item ");"
add_line(root_path, added_line)
exports[root_path,as]
return 0
}
function compute_deps(js_path, dependencies, count, dependencies_added,
i_max, i, as, next_path) {
delete dependencies_added[0]
if (process_file(js_path, js_path, "js"))
return 0
i_max = import_counts[js_path]
for (i = 1; i <= i_max; i++) {
as = imports_as[js_path,i]
next_path = imports_from[js_path,as]
if (next_path in dependencies_added)
continue
count = compute_deps(next_path, dependencies, count, dependencies_added)
if (count < 1)
return 0
}
dependencies_added[js_path]
dependencies[++count] = js_path
return count
}
# Here js_deps and js_deps_count are optional args, used when loading scripts
# into an HTML page to avoid having the same script loaded twice in multiple
# places.
function load_js_file(root_path, read_path, loaded_path, line,
js_deps, js_deps_count,
js_deps_already_added, i, added_line) {
delete js_deps[""]
delete js_deps_already_added[0]
if (validate_path(read_path, loaded_path, line))
return 0
for (i = 1; i <= js_deps_count; i++)
js_deps_already_added[js_deps[i]]
i = js_deps_count
js_deps_count = compute_deps(loaded_path, js_deps,
js_deps_count, js_deps_already_added)
if (js_deps_count < 1) {
printf " when loading %s from %s\n",
loaded_path, read_path > "/dev/stderr"
return 0
}
while (++i <= js_deps_count) {
if (modes[root_path] == "html") {
added_line = ""
} else { #if (modes[root_path] == "manifest") {
added_line = "\"" js_deps[i] "\""
if (i != js_deps_count)
added_line = added_line ","
}
add_line(root_path, added_line)
}
return js_deps_count
}
# css_deps is an array used to avoid having the same stylesheet loaded twice in
# multiple places in a single HTML page.
function load_css_file(root_path, read_path, loaded_path, line, css_deps) {
delete css_deps[""]
if (validate_path(read_path, loaded_path, line))
return 1
if (!(loaded_path in css_deps)) {
css_deps[loaded_path]
to_copy[loaded_path]
added_line = ("")
add_line(root_path, added_line)
}
return 0
}
function load_html_file(root_path, read_path, loaded_path, line) {
if (validate_path(read_path, loaded_path, line))
return 1
if (process_file(loaded_path, loaded_path, "html")) {
printf " when loading %s from %s\n",
loaded_path, read_path, line > "/dev/stderr"
return 1
}
return 0
}
function print_amalgamation(js_deps, js_deps_count,
js_dep_nr, path, max_line_nr, line_nr) {
delete js_deps[0]
js_deps_count = compute_deps(js_to_amalgamate, js_deps, 0)
if (js_deps_count < 1)
return 1
# '<' instead of '<=' because we print the main js file below instead
for (js_dep_nr = 1; js_dep_nr < js_deps_count; js_dep_nr++) {
path = js_deps[js_dep_nr]
max_line_nr = lines_count[path]
for (line_nr = 1; line_nr <= max_line_nr; line_nr++)
print lines[path, line_nr]
}
for (line_nr = 1; line_nr <= main_js_lines_count; line_nr++)
print main_js_lines[line_nr]
return 0
}
function print_usage() {
printf "USAGE: %s compute_scripts.awk -- [-D PREPROCESSOR_DEFINITION]... [-M manifest/to/process/manifest.json]... [-H html/to/process.html]... [-J js/to/process.js]... [-A file/to/append/to.js:appended_code]... [--help|-h] [--output-dir=./build] [--write-js-deps] [--write-html-deps] [--output=files-to-copy|--output=amalgamate-js:js/to/process.js]\n",
ARGV[0] > "/dev/stderr"
}
BEGIN {
option_arg_patterns["D"] = "^" identifier_re "$"
option_arg_patterns["M"] = path_re
option_arg_patterns["H"] = path_re
option_arg_patterns["J"] = path_re
option_arg_patterns["A"] = "^" path_re_noanchor ":"
}
function main(i, j, path, letter, dir, max_line_nr, js_deps, js_deps_count,
code, tmp_lines) {
output_dir = "./build"
write_js_deps = false
write_html_deps = false
delete appended_lines[0]
delete appended_lines_counts[0]
delete tmp_lines[0]
delete lines[0]
delete lines_count[0]
output = ""
js_to_amalgamate = ""
delete main_js_lines[0]
delete manifests_to_process[0]
delete html_to_process[0]
delete js_to_process[0]
delete explicitly_requested[0]
for (i = 1; i < ARGC; i++) {
if (ARGV[i] ~ /^-[DMHJA]$/) {
letter = substr(ARGV[i++], 2)
if (i == ARGC || ARGV[i] !~ option_arg_patterns[letter]) {
printf "ERROR: '-%s' option should be followed by an argument matching '%s'\n",
letter, option_arg_patterns[letter] > "/dev/stderr"
return 1
}
if (letter == "D")
defines[ARGV[i]]
else
explicitly_requested[ARGV[i]]
if (letter == "M")
manifests_to_process[ARGV[i]]
if (letter == "H")
html_to_process[ARGV[i]]
if (letter == "J")
js_to_process[ARGV[i]]
if (letter == "A") {
path = ARGV[i]
sub(/:.*$/, "", path)
if (path in appended_lines_counts) {
printf "ERROR: The same file %s given to the '-A' option multiple times\n",
path > "/dev/stderr"
return 1
}
clear_array(tmp_lines)
code = ARGV[i]
sub(/^[^:]+:/, "", code)
appended_lines_counts[path] = split(code, tmp_lines, "\n")
for (j = appended_lines_counts[path]; j > 0; j--)
appended_lines[path,j] = tmp_lines[j]
}
} else if (ARGV[i] ~ /^-(-help|h)$/ ) {
print_usage()
return 0
} else if (ARGV[i] ~ /^--output-dir=/) {
output_dir = ARGV[i]
sub(/^--output-dir=/, "", output_dir)
} else if (ARGV[i] ~ /^--write-js-deps$/) {
write_js_deps = true
} else if (ARGV[i] ~ /^--write-html-deps$/) {
write_html_deps = true
} else if (ARGV[i] ~ /^--output=files-to-copy$/) {
output = "files-to-copy"
} else if (ARGV[i] ~ /^--output=amalgamate-js:/) {
output = "amalgamate-js"
js_to_amalgamate = ARGV[i]
sub(/^--output=amalgamate-js:/, "", js_to_amalgamate)
if (js_to_amalgamate !~ path_re) {
printf "ERROR: amalgamate-js path does not match '%s': %s\n",
path_re, js_to_amalgamate > "/dev/stderr"
return 1
}
} else {
printf "ERROR: Unknown option '%s'\n", ARGV[i] > "/dev/stderr"
print_usage()
return 1
}
}
if (is_empty(explicitly_requested) && output != "amalgamate-js") {
explicitly_requested["manifest.json"]
manifests_to_process["manifest.json"]
}
for (path in manifests_to_process) {
if (process_file(path, path, "manifest"))
return 1
}
for (path in html_to_process) {
if (process_file(path, path, "html"))
return 1
}
for (path in js_to_process) {
if (process_file(path, path, "js"))
return 1
}
for (path in lines_count) {
if (!(path in explicitly_requested) &&
!(modes[path] == "js" && write_js_deps) &&
!(modes[path] == "html" && write_html_deps))
continue
dir = path
sub(/[^/]*$/, "", dir)
dir = output_dir "/" dir
sub("'", "'\\''", dir)
system("mkdir -p '" dir "'")
printf "" > (output_dir "/" path)
max_line_nr = lines_count[path]
for (i = 1; i <= max_line_nr; i++)
print lines[path, i] >> (output_dir "/" path)
}
if (output == "files-to-copy") {
for (path in to_copy)
print path
}
if (output == "amalgamate-js") {
if (print_amalgamation())
return 1
}
return 0
}
BEGIN {
exit main()
}