From 01937dc9d5215ef96ce756e3ccda51bf29032f58 Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Mon, 10 May 2021 18:07:05 +0200 Subject: initial commit --- background/ResponseMetaData.mjs | 107 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 background/ResponseMetaData.mjs (limited to 'background/ResponseMetaData.mjs') diff --git a/background/ResponseMetaData.mjs b/background/ResponseMetaData.mjs new file mode 100644 index 0000000..345fc54 --- /dev/null +++ b/background/ResponseMetaData.mjs @@ -0,0 +1,107 @@ +/** +* GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript. +* +* Copyright (C) 2018 Giorgio Maone +* +* This file is part of GNU LibreJS. +* +* GNU LibreJS is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. +* +* GNU LibreJS is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with GNU LibreJS. If not, see . +*/ + +/** + This class parses HTTP response headers to extract both the + MIME Content-type and the character set to be used, if specified, + to parse textual data through a decoder. +*/ + +"use strict"; + +const BOM = [0xEF, 0xBB, 0xBF]; +const DECODER_PARAMS = {stream: true}; + +class ResponseMetaData { + constructor(request) { + let {responseHeaders} = request; + this.headers = {}; + for (let h of responseHeaders) { + if (/^\s*Content-(Type|Disposition)\s*$/i.test(h.name)) { + let propertyName = h.name.split("-")[1].trim(); + propertyName = `content${propertyName.charAt(0).toUpperCase()}${propertyName.substring(1).toLowerCase()}`; + this[propertyName] = h.value; + this.headers[propertyName] = h; + } + } + this.computedCharset = ""; + } + + get charset() { + let charset = ""; + if (this.contentType) { + let m = this.contentType.match(/;\s*charset\s*=\s*(\S+)/); + if (m) { + charset = m[1]; + } + } + Object.defineProperty(this, "charset", { value: charset, writable: false, configurable: true }); + return this.computedCharset = charset; + } + + decode(data) { + let charset = this.charset; + let decoder = this.createDecoder(); + let text = decoder.decode(data, DECODER_PARAMS); + if (!charset && /html/i.test(this.contentType)) { + // missing HTTP charset, sniffing in content... + + if (data[0] === BOM[0] && data[1] === BOM[1] && data[2] === BOM[2]) { + // forced UTF-8, nothing to do + return text; + } + + // let's try figuring out the charset from tags + let parser = new DOMParser(); + let doc = parser.parseFromString(text, "text/html"); + let meta = doc.querySelectorAll('meta[charset], meta[http-equiv="content-type"], meta[content*="charset"]'); + for (let m of meta) { + charset = m.getAttribute("charset"); + if (!charset) { + let match = m.getAttribute("content").match(/;\s*charset\s*=\s*([\w-]+)/i) + if (match) charset = match[1]; + } + if (charset) { + decoder = this.createDecoder(charset, null); + if (decoder) { + this.computedCharset = charset; + return decoder.decode(data, DECODER_PARAMS); + } + } + } + } + return text; + } + + createDecoder(charset = this.charset, def = "latin1") { + if (charset) { + try { + return new TextDecoder(charset); + } catch (e) { + console.error(e); + } + } + return def ? new TextDecoder(def) : null; + } +}; +ResponseMetaData.UTF8BOM = new Uint8Array(BOM); + +export default ResponseMetaData; -- cgit v1.2.3