aboutsummaryrefslogtreecommitdiff
path: root/background/ResponseMetaData.mjs
diff options
context:
space:
mode:
Diffstat (limited to 'background/ResponseMetaData.mjs')
-rw-r--r--background/ResponseMetaData.mjs107
1 files changed, 107 insertions, 0 deletions
diff --git a/background/ResponseMetaData.mjs b/background/ResponseMetaData.mjs
new file mode 100644
index 0000000..345fc54
--- /dev/null
+++ b/background/ResponseMetaData.mjs
@@ -0,0 +1,107 @@
+/**
+* GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
+*
+* Copyright (C) 2018 Giorgio Maone <giorgio@maone.net>
+*
+* This file is part of GNU LibreJS.
+*
+* GNU LibreJS is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* GNU LibreJS is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with GNU LibreJS. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/**
+ This class parses HTTP response headers to extract both the
+ MIME Content-type and the character set to be used, if specified,
+ to parse textual data through a decoder.
+*/
+
+"use strict";
+
+const BOM = [0xEF, 0xBB, 0xBF];
+const DECODER_PARAMS = {stream: true};
+
+class ResponseMetaData {
+ constructor(request) {
+ let {responseHeaders} = request;
+ this.headers = {};
+ for (let h of responseHeaders) {
+ if (/^\s*Content-(Type|Disposition)\s*$/i.test(h.name)) {
+ let propertyName = h.name.split("-")[1].trim();
+ propertyName = `content${propertyName.charAt(0).toUpperCase()}${propertyName.substring(1).toLowerCase()}`;
+ this[propertyName] = h.value;
+ this.headers[propertyName] = h;
+ }
+ }
+ this.computedCharset = "";
+ }
+
+ get charset() {
+ let charset = "";
+ if (this.contentType) {
+ let m = this.contentType.match(/;\s*charset\s*=\s*(\S+)/);
+ if (m) {
+ charset = m[1];
+ }
+ }
+ Object.defineProperty(this, "charset", { value: charset, writable: false, configurable: true });
+ return this.computedCharset = charset;
+ }
+
+ decode(data) {
+ let charset = this.charset;
+ let decoder = this.createDecoder();
+ let text = decoder.decode(data, DECODER_PARAMS);
+ if (!charset && /html/i.test(this.contentType)) {
+ // missing HTTP charset, sniffing in content...
+
+ if (data[0] === BOM[0] && data[1] === BOM[1] && data[2] === BOM[2]) {
+ // forced UTF-8, nothing to do
+ return text;
+ }
+
+ // let's try figuring out the charset from <meta> tags
+ let parser = new DOMParser();
+ let doc = parser.parseFromString(text, "text/html");
+ let meta = doc.querySelectorAll('meta[charset], meta[http-equiv="content-type"], meta[content*="charset"]');
+ for (let m of meta) {
+ charset = m.getAttribute("charset");
+ if (!charset) {
+ let match = m.getAttribute("content").match(/;\s*charset\s*=\s*([\w-]+)/i)
+ if (match) charset = match[1];
+ }
+ if (charset) {
+ decoder = this.createDecoder(charset, null);
+ if (decoder) {
+ this.computedCharset = charset;
+ return decoder.decode(data, DECODER_PARAMS);
+ }
+ }
+ }
+ }
+ return text;
+ }
+
+ createDecoder(charset = this.charset, def = "latin1") {
+ if (charset) {
+ try {
+ return new TextDecoder(charset);
+ } catch (e) {
+ console.error(e);
+ }
+ }
+ return def ? new TextDecoder(def) : null;
+ }
+};
+ResponseMetaData.UTF8BOM = new Uint8Array(BOM);
+
+export default ResponseMetaData;