From d911bf37a13a8bfddedd26b0e16d59e6fd4d229a Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Thu, 24 Mar 2022 21:55:51 +0100 Subject: fix encoding detection in StreamFilter --- background/stream_filter.js | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/background/stream_filter.js b/background/stream_filter.js index 921523a..b7879ea 100644 --- a/background/stream_filter.js +++ b/background/stream_filter.js @@ -35,7 +35,7 @@ function validate_encoding(charset) { try { - new TextDecoder(); + new TextDecoder(charset); return charset; } catch(e) { return undefined; @@ -44,7 +44,7 @@ function validate_encoding(charset) function is_content_type_header(header) { - header.name.toLowerCase().trim() === "content-type"; + return header.name.toLowerCase().trim() === "content-type"; } const charset_reg = /;\s*charset\s*=\s*([\w-]+)/i; @@ -55,7 +55,8 @@ function properties_from_headers(headers) for (const header of headers.filter(is_content_type_header)) { const match = charset_reg.exec(header.value); - if (!properties.detected_charset && validate_encoding(match[1])) + if (match && !properties.detected_charset && + validate_encoding(match[1])) properties.detected_charset = match[1]; if (/html/i.test(header.value)) @@ -105,7 +106,11 @@ function charset_from_meta_tags(doc) function create_decoder(properties, data) { let charset = charset_from_BOM(data) || properties.detected_charset; - if (!charset && data.indexOf(0) !== -1) { + + if (charset) + return new TextDecoder(charset); + + if (data.indexOf(0) !== -1) { console.warn("Haketilo: zeroes in bytestream, probable cached encoding mismatch. Trying to decode it as UTF-16.", properties); return new TextDecoder("utf-16be"); -- cgit v1.2.3