diff options
Diffstat (limited to 'vmime-master/src/vmime/utility/encoder/qpEncoder.cpp')
-rw-r--r-- | vmime-master/src/vmime/utility/encoder/qpEncoder.cpp | 568 |
1 files changed, 568 insertions, 0 deletions
diff --git a/vmime-master/src/vmime/utility/encoder/qpEncoder.cpp b/vmime-master/src/vmime/utility/encoder/qpEncoder.cpp new file mode 100644 index 0000000..4aeb640 --- /dev/null +++ b/vmime-master/src/vmime/utility/encoder/qpEncoder.cpp @@ -0,0 +1,568 @@ +// +// VMime library (http://www.vmime.org) +// Copyright (C) 2002 Vincent Richard <vincent@vmime.org> +// +// This program is free software; you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 3 of +// the License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, write to the Free Software Foundation, Inc., +// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Linking this library statically or dynamically with other modules is making +// a combined work based on this library. Thus, the terms and conditions of +// the GNU General Public License cover the whole combination. +// + +#include "vmime/utility/encoder/qpEncoder.hpp" +#include "vmime/parserHelpers.hpp" + + +namespace vmime { +namespace utility { +namespace encoder { + + +qpEncoder::qpEncoder() { + +} + + +const std::vector <string> qpEncoder::getAvailableProperties() const { + + std::vector <string> list(encoder::getAvailableProperties()); + + list.push_back("maxlinelength"); + + list.push_back("text"); // if set, '\r' and '\n' are not hex-encoded. + // WARNING! You should not use this for binary data! + + list.push_back("rfc2047"); // for header fields encoding (RFC #2047) + + return list; +} + + + +// Hex-encoding table +const unsigned char qpEncoder::sm_hexDigits[] = "0123456789ABCDEF"; + + +// RFC-2047 encoding table: we always encode RFC-2047 using the restricted +// charset, that is the one used for 'phrase' in From/To/Cc/... headers. +// +// " The set of characters that may be used in a "Q"-encoded 'encoded-word' +// is restricted to: <upper and lower case ASCII letters, decimal digits, +// "!", "*", "+", "-", "/", "=", and "_" (underscore, ASCII 95.)>. " +// +// Two special cases: +// - encode space (32) as underscore (95) +// - encode underscore as hex (=5F) +// +// This is a quick lookup table: +// '1' means "encode", '0' means "no encoding" +// +const vmime_uint8 qpEncoder::sm_RFC2047EncodeTable[] = { + /* 0 NUL */ 1, /* 1 SOH */ 1, /* 2 STX */ 1, /* 3 ETX */ 1, /* 4 EOT */ 1, /* 5 ENQ */ 1, + /* 6 ACK */ 1, /* 7 BEL */ 1, /* 8 BS */ 1, /* 9 TAB */ 1, /* 10 LF */ 1, /* 11 VT */ 1, + /* 12 FF */ 1, /* 13 CR */ 1, /* 14 SO */ 1, /* 15 SI */ 1, /* 16 DLE */ 1, /* 17 DC1 */ 1, + /* 18 DC2 */ 1, /* 19 DC3 */ 1, /* 20 DC4 */ 1, /* 21 NAK */ 1, /* 22 SYN */ 1, /* 23 ETB */ 1, + /* 24 CAN */ 1, /* 25 EM */ 1, /* 26 SUB */ 1, /* 27 ESC */ 1, /* 28 FS */ 1, /* 29 GS */ 1, + /* 30 RS */ 1, /* 31 US */ 1, /* 32 SPACE*/ 1, /* 33 ! */ 0, /* 34 " */ 1, /* 35 # */ 1, + /* 36 $ */ 1, /* 37 % */ 1, /* 38 & */ 1, /* 39 ' */ 1, /* 40 ( */ 1, /* 41 ) */ 1, + /* 42 * */ 0, /* 43 + */ 0, /* 44 , */ 1, /* 45 - */ 0, /* 46 . */ 1, /* 47 / */ 0, + /* 48 0 */ 0, /* 49 1 */ 0, /* 50 2 */ 0, /* 51 3 */ 0, /* 52 4 */ 0, /* 53 5 */ 0, + /* 54 6 */ 0, /* 55 7 */ 0, /* 56 8 */ 0, /* 57 9 */ 0, /* 58 : */ 1, /* 59 ; */ 1, + /* 60 < */ 1, /* 61 = */ 1, /* 62 > */ 1, /* 63 ? */ 1, /* 64 @ */ 1, /* 65 A */ 0, + /* 66 B */ 0, /* 67 C */ 0, /* 68 D */ 0, /* 69 E */ 0, /* 70 F */ 0, /* 71 G */ 0, + /* 72 H */ 0, /* 73 I */ 0, /* 74 J */ 0, /* 75 K */ 0, /* 76 L */ 0, /* 77 M */ 0, + /* 78 N */ 0, /* 79 O */ 0, /* 80 P */ 0, /* 81 Q */ 0, /* 82 R */ 0, /* 83 S */ 0, + /* 84 T */ 0, /* 85 U */ 0, /* 86 V */ 0, /* 87 W */ 0, /* 88 X */ 0, /* 89 Y */ 0, + /* 90 Z */ 0, /* 91 [ */ 1, /* 92 " */ 1, /* 93 ] */ 1, /* 94 ^ */ 1, /* 95 _ */ 1, + /* 96 ` */ 1, /* 97 a */ 0, /* 98 b */ 0, /* 99 c */ 0, /* 100 d */ 0, /* 101 e */ 0, + /* 102 f */ 0, /* 103 g */ 0, /* 104 h */ 0, /* 105 i */ 0, /* 106 j */ 0, /* 107 k */ 0, + /* 108 l */ 0, /* 109 m */ 0, /* 110 n */ 0, /* 111 o */ 0, /* 112 p */ 0, /* 113 q */ 0, + /* 114 r */ 0, /* 115 s */ 0, /* 116 t */ 0, /* 117 u */ 0, /* 118 v */ 0, /* 119 w */ 0, + /* 120 x */ 0, /* 121 y */ 0, /* 122 z */ 0, /* 123 { */ 1, /* 124 | */ 1, /* 125 } */ 1, + /* 126 ~ */ 1, /* 127 DEL */ 1 +}; + + +// Hex-decoding table +const vmime_uint8 qpEncoder::sm_hexDecodeTable[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, + 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + + +// static +bool qpEncoder::RFC2047_isEncodingNeededForChar(const byte_t c) { + + return c >= 128 || sm_RFC2047EncodeTable[c] != 0; +} + + +// static +int qpEncoder::RFC2047_getEncodedLength(const byte_t c) { + + if (c >= 128 || sm_RFC2047EncodeTable[c] != 0) { + + if (c == 32) { // space + + // Encoded as "_" + return 1; + + } else { + + // Hex encoding + return 3; + } + + } else { + + return 1; // no encoding + } +} + + +#ifndef VMIME_BUILDING_DOC + +#define QP_ENCODE_HEX(x) \ + outBuffer[outBufferPos] = '='; \ + outBuffer[outBufferPos + 1] = sm_hexDigits[x >> 4]; \ + outBuffer[outBufferPos + 2] = sm_hexDigits[x & 0xF]; \ + outBufferPos += 3; \ + curCol += 3 + +#define QP_WRITE(s, x, l) s.write(reinterpret_cast <byte_t*>(x), l) + +#endif // VMIME_BUILDING_DOC + + +size_t qpEncoder::encode( + utility::inputStream& in, + utility::outputStream& out, + utility::progressListener* progress +) { + + in.reset(); // may not work... + + const size_t propMaxLineLength = + getProperties().getProperty <size_t>("maxlinelength", static_cast <size_t>(-1)); + + const bool rfc2047 = getProperties().getProperty <bool>("rfc2047", false); + const bool text = getProperties().getProperty <bool>("text", false); // binary mode by default + + const bool cutLines = (propMaxLineLength != static_cast <size_t>(-1)); + const size_t maxLineLength = std::min(propMaxLineLength, static_cast <size_t>(74)); + + // Process the data + byte_t buffer[16384]; + size_t bufferLength = 0; + size_t bufferPos = 0; + + size_t curCol = 0; + + byte_t outBuffer[16384]; + size_t outBufferPos = 0; + + size_t total = 0; + size_t inTotal = 0; + + if (progress) { + progress->start(0); + } + + while (bufferPos < bufferLength || !in.eof()) { + + // Flush current output buffer + if (outBufferPos + 6 >= static_cast <int>(sizeof(outBuffer))) { + + QP_WRITE(out, outBuffer, outBufferPos); + + total += outBufferPos; + outBufferPos = 0; + } + + // Need to get more data? + if (bufferPos >= bufferLength) { + + bufferLength = in.read(buffer, sizeof(buffer)); + bufferPos = 0; + + // No more data + if (bufferLength == 0) { + break; + } + } + + // Get the next char and encode it + const byte_t c = buffer[bufferPos++]; + + if (rfc2047) { + + if (c >= 128 || sm_RFC2047EncodeTable[c] != 0) { + + if (c == 32) { // space + + // RFC-2047, Page 5, 4.2. The "Q" encoding: + // << The 8-bit hexadecimal value 20 (e.g., ISO-8859-1 SPACE) may be + // represented as "_" (underscore, ASCII 95.). >> + outBuffer[outBufferPos++] = '_'; + ++curCol; + + } else { + + // Other characters: '=' + hexadecimal encoding + QP_ENCODE_HEX(c); + } + + } else { + + // No encoding + outBuffer[outBufferPos++] = c; + ++curCol; + } + + } else { + + switch (c) { + + case 46: { // . + + if (curCol == 0) { + // If a '.' appears at the beginning of a line, we encode it to + // to avoid problems with SMTP servers... ("\r\n.\r\n" means the + // end of data transmission). + QP_ENCODE_HEX('.'); + continue; + } + + outBuffer[outBufferPos++] = '.'; + ++curCol; + break; + } + case 32: { // space + + // Need to get more data? + if (bufferPos >= bufferLength) { + bufferLength = in.read(buffer, sizeof(buffer)); + bufferPos = 0; + } + + // Spaces cannot appear at the end of a line. So, encode the space. + if (bufferPos >= bufferLength || + (buffer[bufferPos] == '\r' || buffer[bufferPos] == '\n')) { + + QP_ENCODE_HEX(' '); + + } else { + + outBuffer[outBufferPos++] = ' '; + ++curCol; + } + + break; + } + case 9: { // TAB + + QP_ENCODE_HEX(c); + break; + } + case 13: // CR + case 10: { // LF + + // RFC-2045/6.7(4) + + // Text data + if (text && !rfc2047) { + + outBuffer[outBufferPos++] = c; + ++curCol; + + if (c == 10) { + curCol = 0; // reset current line length + } + + // Binary data + } else { + + QP_ENCODE_HEX(c); + } + + break; + } + case 61: { // = + + QP_ENCODE_HEX('='); + break; + } + /* + Rule #2: (Literal representation) Octets with decimal values of 33 + through 60 inclusive, and 62 through 126, inclusive, MAY be + represented as the ASCII characters which correspond to those + octets (EXCLAMATION POINT through LESS THAN, and GREATER THAN + through TILDE, respectively). + */ + default: + + //if ((c >= 33 && c <= 60) || (c >= 62 && c <= 126)) + if (c >= 33 && c <= 126 && c != 61 && c != 63) { + + outBuffer[outBufferPos++] = c; + ++curCol; + + // Other characters: '=' + hexadecimal encoding + } else { + + QP_ENCODE_HEX(c); + } + + break; + + } // switch (c) + + // Soft line break : "=\r\n" + if (cutLines && curCol >= maxLineLength - 1) { + + outBuffer[outBufferPos] = '='; + outBuffer[outBufferPos + 1] = '\r'; + outBuffer[outBufferPos + 2] = '\n'; + + outBufferPos += 3; + curCol = 0; + } + + } // !rfc2047 + + ++inTotal; + + if (progress) { + progress->progress(inTotal, inTotal); + } + } + + // Flush remaining output buffer + if (outBufferPos != 0) { + QP_WRITE(out, outBuffer, outBufferPos); + total += outBufferPos; + } + + if (progress) { + progress->stop(inTotal); + } + + return total; +} + + +size_t qpEncoder::decode( + utility::inputStream& in, + utility::outputStream& out, + utility::progressListener* progress +) { + + in.reset(); // may not work... + + // Process the data + const bool rfc2047 = getProperties().getProperty <bool>("rfc2047", false); + + byte_t buffer[16384]; + size_t bufferLength = 0; + size_t bufferPos = 0; + + byte_t outBuffer[16384]; + size_t outBufferPos = 0; + + size_t total = 0; + size_t inTotal = 0; + + while (bufferPos < bufferLength || !in.eof()) { + + // Flush current output buffer + if (outBufferPos >= sizeof(outBuffer)) { + + QP_WRITE(out, outBuffer, outBufferPos); + + total += outBufferPos; + outBufferPos = 0; + } + + // Need to get more data? + if (bufferPos >= bufferLength) { + + bufferLength = in.read(buffer, sizeof(buffer)); + bufferPos = 0; + + // No more data + if (bufferLength == 0) { + break; + } + } + + // Decode the next sequence (hex-encoded byte or printable character) + byte_t c = buffer[bufferPos++]; + + ++inTotal; + + switch (c) { + + case '=': { + + if (bufferPos >= bufferLength) { + bufferLength = in.read(buffer, sizeof(buffer)); + bufferPos = 0; + } + + if (bufferPos < bufferLength) { + + c = buffer[bufferPos++]; + + ++inTotal; + + switch (c) { + + // Ignore soft line break ("=\r\n" or "=\n") + case '\r': + + // Read one byte more + if (bufferPos >= bufferLength) { + bufferLength = in.read(buffer, sizeof(buffer)); + bufferPos = 0; + } + + if (bufferPos < bufferLength) { + ++bufferPos; + ++inTotal; + } + + break; + + case '\n': + + break; + + // Hex-encoded char + default: + { + // We need another byte... + if (bufferPos >= bufferLength) { + bufferLength = in.read(buffer, sizeof(buffer)); + bufferPos = 0; + } + + if (bufferPos < bufferLength) { + + const byte_t next = buffer[bufferPos++]; + + ++inTotal; + + const byte_t value = static_cast <byte_t>( + sm_hexDecodeTable[c] * 16 + sm_hexDecodeTable[next] + ); + + outBuffer[outBufferPos++] = value; + + } else { + + // Premature end-of-data + } + + break; + } + + } + + } else { + + // Premature end-of-data + } + + break; + } + case '_': { + + if (rfc2047) { + + // RFC-2047, Page 5, 4.2. The "Q" encoding: + // << Note that the "_" always represents hexadecimal 20, even if the SPACE + // character occupies a different code position in the character set in use. >> + outBuffer[outBufferPos++] = 0x20; + break; + } + + outBuffer[outBufferPos++] = c; + break; + } + default: { + + outBuffer[outBufferPos++] = c; + break; + } + + } + + if (progress) { + progress->progress(inTotal, inTotal); + } + } + + // Flush remaining output buffer + if (outBufferPos != 0) { + QP_WRITE(out, outBuffer, outBufferPos); + total += outBufferPos; + } + + if (progress) { + progress->stop(inTotal); + } + + return total; +} + + +size_t qpEncoder::getEncodedSize(const size_t n) const { + + const size_t propMaxLineLength = + getProperties().getProperty <size_t>("maxlinelength", static_cast <size_t>(-1)); + + const bool cutLines = (propMaxLineLength != static_cast <size_t>(-1)); + const size_t maxLineLength = std::min(propMaxLineLength, static_cast <size_t>(74)); + + // Worst cast: 1 byte of input provide 3 bytes of output + // Count CRLF (2 bytes) for each line. + return n * 3 + (cutLines ? (n / maxLineLength) * 2 : 0); +} + + +size_t qpEncoder::getDecodedSize(const size_t n) const { + + // Worst case: 1 byte of input equals 1 byte of output + return n; +} + + +} // encoder +} // utility +} // vmime |