// // VMime library (http://www.vmime.org) // Copyright (C) 2002 Vincent Richard // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 3 of // the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // You should have received a copy of the GNU General Public License along // with this program; if not, write to the Free Software Foundation, Inc., // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. // // Linking this library statically or dynamically with other modules is making // a combined work based on this library. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // #include "vmime/bodyPart.hpp" #include "vmime/body.hpp" #include "vmime/contentTypeField.hpp" #include "vmime/text.hpp" #include "vmime/utility/random.hpp" #include "vmime/utility/seekableInputStreamRegionAdapter.hpp" #include "vmime/utility/outputStreamAdapter.hpp" #include "vmime/parserHelpers.hpp" #include "vmime/emptyContentHandler.hpp" #include "vmime/stringContentHandler.hpp" #include "vmime/streamContentHandler.hpp" namespace vmime { body::body() : m_contents(make_shared ()) { } body::~body() { } /* * boundaryStart: will become the index for "\r\n--marker" * boundaryEnd: will become the index after "marker", i.e. index for potential trailing "--", "\r\n", etc. * return value: index for "marker" */ // static size_t body::findNextBoundaryPosition( const shared_ptr & parser, const string& boundary, const size_t position, const size_t end, size_t* boundaryStart, size_t* boundaryEnd ) { size_t pos = position; for (; pos != npos && pos < end; ++pos) { pos = parser->findNext(boundary, pos); if (pos == npos) { break; // not found } if (pos == 0) { // Boundary is a prefix of another, continue the search (same for the other "continue"s) continue; } // Ensure the bytes before boundary are "[LF]--": boundary should be // at the beginning of a line, and should start with "--" if (pos < 3) { continue; } parser->seek(pos - 3); if (!parser->matchBytes("\n--", 3)) { continue; } parser->seek(pos + boundary.length()); const byte_t next = parser->peekByte(); // Boundary should be followed by a new line or a dash if (!isspace(next) && next != '-') { continue; } // Get rid of the "[CR]" just before "[LF]--", if any size_t backwards = 0; if (pos >= 4) { parser->seek(pos - 4); if (parser->peekByte() == '\r') { ++backwards; } } *boundaryStart = pos - backwards - 3; *boundaryEnd = pos + boundary.length(); return pos; } return pos; } void body::parseImpl( const parsingContext& ctx, const shared_ptr & parser, const size_t position, const size_t end, size_t* newPosition ) { removeAllParts(); m_prologText.clear(); m_epilogText.clear(); if (end == position) { setParsedBounds(position, end); if (newPosition) { *newPosition = end; } return; } // Check whether the body is a MIME-multipart. // If it is, also get (or try to guess) the boundary separator. bool isMultipart = false; string boundary; shared_ptr ctf = m_part->getHeader()->findField (fields::CONTENT_TYPE); if (ctf) { const mediaType type = *ctf->getValue (); if (type.getType() == mediaTypes::MULTIPART) { isMultipart = true; if (ctf->hasBoundary()) { boundary = ctf->getBoundary(); } else { // No "boundary" parameter specified: we can try to // guess it by scanning the body contents... size_t pos = position; parser->seek(pos); if (pos + 2 < end && parser->matchBytes("--", 2)) { pos += 2; } else { pos = parser->findNext("\n--", position); if ((pos != npos) && (pos + 3 < end)) { pos += 3; // skip \n-- } } if ((pos != npos) && (pos < end)) { parser->seek(pos); // Read some bytes after boundary separator byte_t buffer[256]; const size_t bufferLen = parser->read(buffer, std::min(end - pos, sizeof(buffer) / sizeof(buffer[0]))); buffer[sizeof(buffer) / sizeof(buffer[0]) - 1] = '\0'; // Skip transport padding bytes (SPACE or HTAB), if any size_t boundarySkip = 0; while (boundarySkip < bufferLen && parserHelpers::isSpace(buffer[boundarySkip])) { ++boundarySkip; } // Extract boundary from buffer (stop at first CR or LF). // We have to stop after a reasonnably long boundary length (100) // not to take the whole body contents for a boundary... byte_t boundaryBytes[100]; size_t boundaryLen = 0; for (byte_t c = buffer[boundarySkip] ; boundaryLen < bufferLen && boundaryLen < 100 && !(c == '\r' || c == '\n') ; ++boundaryLen, c = buffer[boundarySkip + boundaryLen]) { boundaryBytes[boundaryLen] = c; } if (boundaryLen >= 1 && boundaryLen < 100) { // RFC #1521, Page 31: // "...the boundary parameter, which consists of 1 to 70 // characters from a set of characters known to be very // robust through email gateways, and NOT ending with // white space..." while (boundaryLen != 0 && parserHelpers::isSpace(boundaryBytes[boundaryLen - 1])) { boundaryLen--; } if (boundaryLen >= 1) { boundary = string(boundaryBytes, boundaryBytes + boundaryLen); } } } } } } // This is a multi-part body if (isMultipart && !boundary.empty()) { size_t partStart = position; size_t pos = position; bool lastPart = false; // Find the first boundary size_t boundaryStart, boundaryEnd; pos = findNextBoundaryPosition(parser, boundary, pos, end, &boundaryStart, &boundaryEnd); for (int index = 0 ; !lastPart && (pos != npos) && (pos < end) ; ++index) { size_t partEnd = boundaryStart; // Check whether it is the last part (boundary terminated by "--") parser->seek(boundaryEnd); if (boundaryEnd + 1 < end && parser->matchBytes("--", 2)) { lastPart = true; boundaryEnd += 2; } // RFC 2046 §5.1.1 page 22: """If a boundary delimiter // line appears to end with white space, the white // space must be presumed to have been added by a // gateway, and must be deleted.""" parser->seek(boundaryEnd); boundaryEnd += parser->skipIf(parserHelpers::isSpaceOrTab, end); // End of boundary line if (boundaryEnd + 1 < end && parser->matchBytes("\r\n", 2)) { boundaryEnd += 2; } else if (boundaryEnd < end && parser->peekByte() == '\n') { ++boundaryEnd; } else if (boundaryEnd == end) { } else { /* * RFC 2046 §5.1.1 page 19: """[...] optional * linear whitespace, and a terminating * CRLF.""" — junk handling is left * unspecified, so we might as well skip it to * facilitate broken mails. */ boundaryEnd += parser->skipIf([](char_t c) { return c != '\n'; }, end); pos = findNextBoundaryPosition(parser, boundary, boundaryEnd, end, &boundaryStart, &boundaryEnd); --index; continue; } if (index == 0) { if (partEnd > partStart) { vmime::text text; text.parse(ctx, parser, partStart, partEnd); m_prologText = text.getWholeBuffer(); } else { m_prologText = ""; } } else { // index > 0 shared_ptr part = m_part->createChildPart(); // End before start may happen on empty bodyparts (directly // successive boundaries without even a line-break) if (partEnd < partStart) { std::swap(partStart, partEnd); } part->parse(ctx, parser, partStart, partEnd, NULL); m_parts.push_back(part); } partStart = boundaryEnd; // Find the next boundary pos = findNextBoundaryPosition( parser, boundary, boundaryEnd, end, &boundaryStart, &boundaryEnd ); } m_contents = make_shared (); // Last part was not found: recover from missing boundary if (!lastPart && pos == npos) { shared_ptr part = m_part->createChildPart(); try { part->parse(ctx, parser, partStart, end); } catch (std::exception&) { throw; } m_parts.push_back(part); // Treat remaining text as epilog } else if (partStart < end) { vmime::text text; text.parse(ctx, parser, partStart, end); m_epilogText = text.getWholeBuffer(); } // Treat the contents as 'simple' data } else { encoding enc; shared_ptr cef = m_part->getHeader()->findField(fields::CONTENT_TRANSFER_ENCODING); if (cef) { enc = *cef->getValue (); } else { // Defaults to "7bit" (RFC-1521) enc = vmime::encoding(encodingTypes::SEVEN_BIT); } // Extract the (encoded) contents const size_t length = end - position; shared_ptr contentStream = make_shared ( parser->getUnderlyingStream(), position, length ); m_contents = make_shared (contentStream, length, enc); } setParsedBounds(position, end); if (newPosition) { *newPosition = end; } } text body::getActualPrologText(const generationContext& ctx) const { const string& prologText = m_prologText.empty() ? (isRootPart() ? ctx.getPrologText() : NULL_STRING ) : m_prologText; if (prologText.empty()) { return text(); } else { return text(prologText, vmime::charset("us-ascii")); } } text body::getActualEpilogText(const generationContext& ctx) const { const string& epilogText = m_epilogText.empty() ? (isRootPart() ? ctx.getEpilogText() : NULL_STRING ) : m_epilogText; if (epilogText.empty()) { return text(); } else { return text(epilogText, vmime::charset("us-ascii")); } } void body::generateImpl( const generationContext& ctx, utility::outputStream& os, const size_t /* curLinePos */, size_t* newLinePos ) const { // MIME-Multipart if (getPartCount() != 0) { string boundary; if (!m_part) { boundary = generateRandomBoundaryString(); } else { // Use current boundary string, if specified. If no "Content-Type" field is // present, or the boundary is not specified, generate a random one shared_ptr ctf = m_part->getHeader()->findField (fields::CONTENT_TYPE); if (ctf) { if (ctf->hasBoundary()) { boundary = ctf->getBoundary(); } else { // No boundary string specified boundary = generateRandomBoundaryString(); } } else { // No Content-Type (and no boundary string specified) boundary = generateRandomBoundaryString(); } } const text prologText = getActualPrologText(ctx); const text epilogText = getActualEpilogText(ctx); if (!prologText.isEmpty()) { prologText.encodeAndFold( ctx, os, 0, NULL, text::FORCE_NO_ENCODING | text::NO_NEW_LINE_SEQUENCE ); os << CRLF; } os << "--" << boundary; for (size_t p = 0 ; p < getPartCount() ; ++p) { os << CRLF; getPartAt(p)->generate(ctx, os, 0); os << CRLF << "--" << boundary; } os << "--" << CRLF; if (!epilogText.isEmpty()) { epilogText.encodeAndFold( ctx, os, 0, NULL, text::FORCE_NO_ENCODING | text::NO_NEW_LINE_SEQUENCE ); os << CRLF; } if (newLinePos) { *newLinePos = 0; } // Simple body } else { // Generate the contents shared_ptr contents = m_contents->clone(); contents->setContentTypeHint(getContentType()); contents->generate(os, getEncoding(), ctx.getMaxLineLength()); } } size_t body::getGeneratedSize(const generationContext& ctx) { // MIME-Multipart if (getPartCount() != 0) { size_t size = 0; // Size of parts and boundaries for (size_t p = 0 ; p < getPartCount() ; ++p) { size += 100; // boundary, CRLF... size += getPartAt(p)->getGeneratedSize(ctx); } // Size of prolog/epilog text const text prologText = getActualPrologText(ctx); if (!prologText.isEmpty()) { std::ostringstream oss; utility::outputStreamAdapter osa(oss); prologText.encodeAndFold( ctx, osa, 0, NULL, text::FORCE_NO_ENCODING | text::NO_NEW_LINE_SEQUENCE ); size += oss.str().size(); } const text epilogText = getActualEpilogText(ctx); if (!epilogText.isEmpty()) { std::ostringstream oss; utility::outputStreamAdapter osa(oss); epilogText.encodeAndFold( ctx, osa, 0, NULL, text::FORCE_NO_ENCODING | text::NO_NEW_LINE_SEQUENCE ); size += oss.str().size(); } return size; // Simple body } else { if (getEncoding() == m_contents->getEncoding()) { // No re-encoding has to be performed return m_contents->getLength(); } else { shared_ptr srcEncoder = m_contents->getEncoding().getEncoder(); shared_ptr dstEncoder = getEncoding().getEncoder(); return dstEncoder->getEncodedSize(srcEncoder->getDecodedSize(m_contents->getLength())); } } } /* RFC #1521, Page 32: 7.2.1. Multipart: The common syntax "...Encapsulation boundaries must not appear within the encapsulations, and must be no longer than 70 characters..." boundary := 0*69 bcharsnospace bchars := bcharsnospace / " " bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /"_" / "," / "-" / "." / "/" / ":" / "=" / "?" */ const string body::generateRandomBoundaryString() { // 64 characters that can be _safely_ used in a boundary string static const char bchars[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-+"; /* RFC #1521, Page 19: Since the hyphen character ("-") is represented as itself in the Quoted-Printable encoding, care must be taken, when encapsulating a quoted-printable encoded body in a multipart entity, to ensure that the encapsulation boundary does not appear anywhere in the encoded body. (A good strategy is to choose a boundary that includes a character sequence such as "=_" which can never appear in a quoted- printable body. See the definition of multipart messages later in this document.) */ char boundary[2 + 48 + 1] = { 0 }; boundary[0] = '='; boundary[1] = '_'; // Generate a string of random characters unsigned int r = utility::random::getTime(); unsigned int m = static_cast (sizeof(unsigned int)); for (size_t i = 2 ; i < (sizeof(boundary) / sizeof(boundary[0]) - 1) ; ++i) { boundary[i] = bchars[r & 63]; r >>= 6; if (--m == 0) { r = utility::random::getNext(); m = static_cast (sizeof(unsigned int)); } } return string(boundary); } bool body::isValidBoundary(const string& boundary) { static const string validChars("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'()+_,-./:=?"); const string::const_iterator end = boundary.end(); bool valid = false; if (boundary.length() > 0 && boundary.length() < 70) { const char last = *(end - 1); if (!(last == ' ' || last == '\t' || last == '\n')) { valid = true; for (string::const_iterator i = boundary.begin() ; valid && i != end ; ++i) { valid = (validChars.find_first_of(*i) != string::npos); } } } return valid; } // // Quick-access functions // void body::setContentType(const mediaType& type, const charset& chset) { shared_ptr ctf = dynamicCast (m_part->getHeader()->ContentType()); ctf->setValue(type); ctf->setCharset(chset); } void body::setContentType(const mediaType& type) { m_part->getHeader()->ContentType()->setValue(type); } const mediaType body::getContentType() const { shared_ptr ctf = m_part->getHeader()->findField (fields::CONTENT_TYPE); if (ctf) { return *ctf->getValue (); } else { // Defaults to "text/plain" (RFC-1521) return mediaType(mediaTypes::TEXT, mediaTypes::TEXT_PLAIN); } } void body::setCharset(const charset& chset) { shared_ptr ctf = m_part->getHeader()->findField (fields::CONTENT_TYPE); // If a Content-Type field exists, set charset if (ctf) { ctf->setCharset(chset); // Else, create a new Content-Type field of default type "text/plain" // and set charset on it } else { setContentType(mediaType(mediaTypes::TEXT, mediaTypes::TEXT_PLAIN), chset); } } const charset body::getCharset() const { const shared_ptr ctf = m_part->getHeader()->findField (fields::CONTENT_TYPE); if (ctf) { if (ctf->hasCharset()) { return ctf->getCharset(); } else { // Defaults to "us-ascii" (RFC-1521) return vmime::charset(charsets::US_ASCII); } } else { // Defaults to "us-ascii" (RFC-1521) return vmime::charset(charsets::US_ASCII); } } void body::setEncoding(const encoding& enc) { m_part->getHeader()->ContentTransferEncoding()->setValue(enc); } const encoding body::getEncoding() const { shared_ptr cef = m_part->getHeader()->findField(fields::CONTENT_TRANSFER_ENCODING); if (cef) { return *cef->getValue (); } else { if (m_contents->isEncoded()) { return m_contents->getEncoding(); } } // Defaults to "7bit" (RFC-1521) return vmime::encoding(encodingTypes::SEVEN_BIT); } void body::setParentPart(bodyPart* parent) { m_part = parent; for (std::vector >::iterator it = m_parts.begin() ; it != m_parts.end() ; ++it) { shared_ptr childPart = *it; parent->importChildPart(childPart); } } bool body::isRootPart() const { return !m_part || !m_part->getParentPart(); } shared_ptr body::clone() const { shared_ptr bdy = make_shared (); bdy->copyFrom(*this); return bdy; } void body::copyFrom(const component& other) { const body& bdy = dynamic_cast (other); m_prologText = bdy.m_prologText; m_epilogText = bdy.m_epilogText; m_contents = bdy.m_contents; removeAllParts(); for (size_t p = 0 ; p < bdy.getPartCount() ; ++p) { shared_ptr part = m_part->createChildPart(); part->copyFrom(*bdy.getPartAt(p)); m_parts.push_back(part); } } body& body::operator=(const body& other) { copyFrom(other); return *this; } const string& body::getPrologText() const { return m_prologText; } void body::setPrologText(const string& prologText) { m_prologText = prologText; } const string& body::getEpilogText() const { return m_epilogText; } void body::setEpilogText(const string& epilogText) { m_epilogText = epilogText; } const shared_ptr body::getContents() const { return m_contents; } void body::setContents(const shared_ptr & contents) { m_contents = contents; } void body::setContents( const shared_ptr & contents, const mediaType& type ) { m_contents = contents; setContentType(type); } void body::setContents( const shared_ptr & contents, const mediaType& type, const charset& chset ) { m_contents = contents; setContentType(type, chset); } void body::setContents( const shared_ptr & contents, const mediaType& type, const charset& chset, const encoding& enc ) { m_contents = contents; setContentType(type, chset); setEncoding(enc); } void body::initNewPart(const shared_ptr & part) { // A part can be in only one body at the same time: if part is // already attached to a parent part, remove it from the current // parent part if (part->getParentPart()) { part->getParentPart()->getBody()->removePart(part); } if (m_part) { m_part->importChildPart(part); shared_ptr
hdr = m_part->getHeader(); // Check whether we have a boundary string shared_ptr ctf = hdr->findField (fields::CONTENT_TYPE); if (ctf) { if (ctf->hasBoundary()) { const string boundary = ctf->getBoundary(); if (boundary.empty() || !isValidBoundary(boundary)) { ctf->setBoundary(generateRandomBoundaryString()); } } else { // No "boundary" parameter: generate a random one. ctf->setBoundary(generateRandomBoundaryString()); } if (ctf->getValue ()->getType() != mediaTypes::MULTIPART) { // Warning: multi-part body but the Content-Type is // not specified as "multipart/..." } } else { // No "Content-Type" field: create a new one and generate // a random boundary string. ctf = hdr->getField (fields::CONTENT_TYPE); ctf->setValue(mediaType(mediaTypes::MULTIPART, mediaTypes::MULTIPART_MIXED)); ctf->setBoundary(generateRandomBoundaryString()); } } } void body::appendPart(const shared_ptr & part) { initNewPart(part); m_parts.push_back(part); } void body::insertPartBefore( const shared_ptr & beforePart, const shared_ptr & part ) { initNewPart(part); const std::vector >::iterator it = std::find( m_parts.begin(), m_parts.end(), beforePart ); if (it == m_parts.end()) { throw exceptions::no_such_part(); } m_parts.insert(it, part); } void body::insertPartBefore( const size_t pos, const shared_ptr & part ) { initNewPart(part); m_parts.insert(m_parts.begin() + pos, part); } void body::insertPartAfter( const shared_ptr & afterPart, const shared_ptr & part ) { initNewPart(part); const std::vector >::iterator it = std::find( m_parts.begin(), m_parts.end(), afterPart ); if (it == m_parts.end()) { throw exceptions::no_such_part(); } m_parts.insert(it + 1, part); } void body::insertPartAfter(const size_t pos, const shared_ptr & part) { initNewPart(part); m_parts.insert(m_parts.begin() + pos + 1, part); } void body::removePart(const shared_ptr & part) { const std::vector >::iterator it = std::find( m_parts.begin(), m_parts.end(), part ); if (it == m_parts.end()) { throw exceptions::no_such_part(); } m_parts.erase(it); } void body::removePart(const size_t pos) { m_parts.erase(m_parts.begin() + pos); } void body::removeAllParts() { m_parts.clear(); } size_t body::getPartCount() const { return m_parts.size(); } bool body::isEmpty() const { return m_parts.size() == 0; } shared_ptr body::getPartAt(const size_t pos) { return m_parts[pos]; } const shared_ptr body::getPartAt(const size_t pos) const { return m_parts[pos]; } const std::vector > body::getPartList() const { std::vector > list; list.reserve(m_parts.size()); for (std::vector >::const_iterator it = m_parts.begin() ; it != m_parts.end() ; ++it) { list.push_back(*it); } return list; } const std::vector > body::getPartList() { return m_parts; } const std::vector > body::getChildComponents() { std::vector > list; copy_vector(m_parts, list); return list; } } // vmime