diff options
Diffstat (limited to 'vmime-master/src/vmime/wordEncoder.cpp')
-rw-r--r-- | vmime-master/src/vmime/wordEncoder.cpp | 320 |
1 files changed, 320 insertions, 0 deletions
diff --git a/vmime-master/src/vmime/wordEncoder.cpp b/vmime-master/src/vmime/wordEncoder.cpp new file mode 100644 index 0000000..4f47d04 --- /dev/null +++ b/vmime-master/src/vmime/wordEncoder.cpp @@ -0,0 +1,320 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002 Vincent Richard <vincent@vmime.org> +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#include "vmime/wordEncoder.hpp" + +#include "vmime/exception.hpp" +#include "vmime/charsetConverter.hpp" + +#include "vmime/encoding.hpp" + +#include "vmime/utility/encoder/b64Encoder.hpp" +#include "vmime/utility/encoder/qpEncoder.hpp" + +#include "vmime/utility/stringUtils.hpp" + +#include "vmime/utility/outputStreamStringAdapter.hpp" +#include "vmime/utility/inputStreamStringAdapter.hpp" + + +namespace vmime { + + +wordEncoder::wordEncoder( + const string& buffer, + const charset& charset, + const Encoding encoding +) + : m_buffer(buffer), + m_pos(0), + m_length(buffer.length()), + m_charset(charset), + m_encoding(encoding) { + + try { + + string utf8Buffer; + + vmime::charset::convert( + buffer, utf8Buffer, charset, vmime::charset(charsets::UTF_8) + ); + + m_buffer = utf8Buffer; + m_length = utf8Buffer.length(); + + m_simple = false; + + } catch (exceptions::charset_conv_error&) { + + // Ignore exception. + // We will fall back on simple encoding. + m_simple = true; + } + + if (m_encoding == ENCODING_AUTO) { + m_encoding = guessBestEncoding(buffer, charset); + } + + if (m_encoding == ENCODING_B64) { + + m_encoder = make_shared <utility::encoder::b64Encoder>(); + + } else { // ENCODING_QP + + m_encoder = make_shared <utility::encoder::qpEncoder>(); + m_encoder->getProperties()["rfc2047"] = true; + } +} + + +static size_t getUTF8CharLength( + const string& buffer, + const size_t pos, + const size_t length +) { + + // Gives the number of extra bytes in a UTF8 char, given the leading char + static const unsigned char UTF8_EXTRA_BYTES[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 + }; + + const unsigned char c = buffer[pos]; + const unsigned char n = UTF8_EXTRA_BYTES[c]; + + if (n < length - pos) { + return n + 1; + } else { + return 1; + } +} + + +const string wordEncoder::getNextChunk(const size_t maxLength) { + + const size_t remaining = m_length - m_pos; + + if (remaining == 0) { + return string(); + } + + vmime::string chunk; + vmime::utility::outputStreamStringAdapter chunkStream(chunk); + + // Simple encoding + if (m_simple) { + + // WARNING! Simple encoding can encode a non-integral number of + // characters and then may generate incorrectly-formed words! + + if (m_encoding == ENCODING_B64) { + + // Here, we have a formula to compute the maximum number of source + // bytes to encode knowing the maximum number of encoded chars. In + // Base64 encoding, 3 bytes of input provide 4 bytes of output. + const size_t inputCount = + std::min(remaining, (maxLength > 1) ? ((maxLength - 1) * 3) / 4 : 1); + + // Encode chunk + utility::inputStreamStringAdapter in(m_buffer, m_pos, m_pos + inputCount); + + m_encoder->encode(in, chunkStream); + m_pos += inputCount; + + } else { // ENCODING_QP + + // Compute exactly how much input bytes are needed to have an output + // string length of less than 'maxLength' bytes. In Quoted-Printable + // encoding, encoded bytes take 3 bytes. + size_t inputCount = 0; + size_t outputCount = 0; + + while ((inputCount == 0 || outputCount < maxLength) && (inputCount < remaining)) { + + const unsigned char c = m_buffer[m_pos + inputCount]; + + inputCount++; + outputCount += utility::encoder::qpEncoder::RFC2047_getEncodedLength(c); + } + + // Encode chunk + utility::inputStreamStringAdapter in(m_buffer, m_pos, m_pos + inputCount); + + m_encoder->encode(in, chunkStream); + m_pos += inputCount; + } + + // Fully RFC-compliant encoding + } else { + + shared_ptr <charsetConverter> conv = charsetConverter::create(charsets::UTF_8, m_charset); + + size_t inputCount = 0; + size_t outputCount = 0; + string encodeBuffer; + + while ((inputCount == 0 || outputCount < maxLength) && (inputCount < remaining)) { + + // Get the next UTF8 character + const size_t inputCharLength = + getUTF8CharLength(m_buffer, m_pos + inputCount, m_length); + + const string inputChar( + m_buffer.begin() + m_pos + inputCount, + m_buffer.begin() + m_pos + inputCount + inputCharLength + ); + + // Convert back to original encoding + string encodeBytes; + conv->convert(inputChar, encodeBytes); + + encodeBuffer += encodeBytes; + + // Compute number of output bytes + if (m_encoding == ENCODING_B64) { + + outputCount = std::max( + static_cast <size_t>(4), + (encodeBuffer.length() * 4) / 3 + ); + + } else { // ENCODING_QP + + for (size_t i = 0, n = encodeBytes.length() ; i < n ; ++i) { + + const unsigned char c = encodeBytes[i]; + outputCount += utility::encoder::qpEncoder::RFC2047_getEncodedLength(c); + } + } + + inputCount += inputCharLength; + } + + // Encode chunk + utility::inputStreamStringAdapter in(encodeBuffer); + + m_encoder->encode(in, chunkStream); + m_pos += inputCount; + } + + return chunk; +} + + +wordEncoder::Encoding wordEncoder::getEncoding() const { + + return m_encoding; +} + + +// static +bool wordEncoder::isEncodingNeeded( + const generationContext& ctx, + const string& buffer, + const charset& charset, + const string& lang +) { + + if (!ctx.getInternationalizedEmailSupport()) { + + // Charset-specific encoding + encoding recEncoding; + + if (charset.getRecommendedEncoding(recEncoding)) { + return true; + } + + // No encoding is needed if the buffer only contains ASCII chars + if (utility::stringUtils::findFirstNonASCIIchar(buffer.begin(), buffer.end()) != string::npos) { + return true; + } + } + + // Force encoding when there are only ASCII chars, but there is + // also at least one of '\n' or '\r' (header fields) + if (buffer.find_first_of("\n\r") != string::npos) { + return true; + } + + // If any RFC-2047 sequence is found in the buffer, encode it + if (buffer.find("=?") != string::npos || buffer.find("?=") != string::npos) { + return true; + } + + // If a language is specified, force encoding + if (!lang.empty()) { + return true; + } + + return false; +} + + +// static +wordEncoder::Encoding wordEncoder::guessBestEncoding( + const string& buffer, + const charset& charset +) { + + // Charset-specific encoding + encoding recEncoding; + + if (charset.getRecommendedEncoding(recEncoding)) { + + if (recEncoding == encoding(encodingTypes::QUOTED_PRINTABLE)) { + return ENCODING_QP; + } else { + return ENCODING_B64; + } + } + + // Use Base64 if more than 40% non-ASCII, or Quoted-Printable else (default) + const size_t asciiCount = + utility::stringUtils::countASCIIchars(buffer.begin(), buffer.end()); + + const size_t asciiPercent = + buffer.length() == 0 ? 100 : (100 * asciiCount) / buffer.length(); + + if (asciiPercent < 60) { + return ENCODING_B64; + } else { + return ENCODING_QP; + } +} + + +} // vmime |