// // VMime library (http://www.vmime.org) // Copyright (C) 2002 Vincent Richard // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 3 of // the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // You should have received a copy of the GNU General Public License along // with this program; if not, write to the Free Software Foundation, Inc., // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. // // Linking this library statically or dynamically with other modules is making // a combined work based on this library. Thus, the terms and conditions of // the GNU General Public License cover the whole combination. // #include "vmime/htmlTextPart.hpp" #include "vmime/exception.hpp" #include "vmime/contentTypeField.hpp" #include "vmime/contentDisposition.hpp" #include "vmime/text.hpp" #include "vmime/emptyContentHandler.hpp" #include "vmime/stringContentHandler.hpp" #include "vmime/utility/outputStreamAdapter.hpp" namespace vmime { htmlTextPart::htmlTextPart() : m_plainText(make_shared ()), m_text(make_shared ()) { } htmlTextPart::~htmlTextPart() { } const mediaType htmlTextPart::getType() const { return mediaType(mediaTypes::TEXT, mediaTypes::TEXT_HTML); } size_t htmlTextPart::getPartCount() const { return m_plainText->isEmpty() ? 1 : 2; } void htmlTextPart::generateIn( const shared_ptr & /* message */, const shared_ptr & parent ) const { // Plain text if (!m_plainText->isEmpty()) { // -- Create a new part shared_ptr part = make_shared (); parent->getBody()->appendPart(part); // -- Set contents part->getBody()->setContents( m_plainText, mediaType(mediaTypes::TEXT, mediaTypes::TEXT_PLAIN), m_charset, encoding::decide(m_plainText, m_charset, encoding::USAGE_TEXT) ); } // HTML text // -- Create a new part shared_ptr htmlPart = make_shared (); // -- Set contents htmlPart->getBody()->setContents( m_text, mediaType(mediaTypes::TEXT, mediaTypes::TEXT_HTML), m_charset, encoding::decide(m_text, m_charset, encoding::USAGE_TEXT) ); // Handle the case we have embedded objects if (!m_objects.empty()) { // Create a "multipart/related" body part shared_ptr relPart = make_shared (); parent->getBody()->appendPart(relPart); relPart->getHeader()->ContentType()->setValue( mediaType(mediaTypes::MULTIPART, mediaTypes::MULTIPART_RELATED) ); // Add the HTML part into this part relPart->getBody()->appendPart(htmlPart); // Also add objects into this part for (std::vector >::const_iterator it = m_objects.begin() ; it != m_objects.end() ; ++it) { shared_ptr objPart = make_shared (); relPart->getBody()->appendPart(objPart); string id = (*it)->getId(); if (id.length() >= 4 && (id[0] == 'c' || id[0] == 'C') && (id[1] == 'i' || id[1] == 'I') && (id[2] == 'd' || id[2] == 'D') && id[3] == ':') { id = id.substr(4); } objPart->getHeader()->ContentType()->setValue((*it)->getType()); objPart->getHeader()->ContentId()->setValue(messageId("<" + id + ">")); objPart->getHeader()->ContentDisposition()->setValue(contentDisposition(contentDispositionTypes::INLINE)); objPart->getHeader()->ContentTransferEncoding()->setValue((*it)->getEncoding()); //encoding(encodingTypes::BASE64); objPart->getBody()->setContents((*it)->getData()->clone()); } } else { // Add the HTML part into the parent part parent->getBody()->appendPart(htmlPart); } } void htmlTextPart::findEmbeddedParts( const bodyPart& part, std::vector >& cidParts, std::vector >& locParts ) { for (size_t i = 0 ; i < part.getBody()->getPartCount() ; ++i) { shared_ptr p = part.getBody()->getPartAt(i); // For a part to be an embedded object, it must have either a // Content-Id field or a Content-Location field. if (p->getHeader()->hasField(fields::CONTENT_ID)) { cidParts.push_back(p); } if (p->getHeader()->hasField(fields::CONTENT_LOCATION)) { locParts.push_back(p); } findEmbeddedParts(*p, cidParts, locParts); } } void htmlTextPart::addEmbeddedObject( const bodyPart& part, const string& id, const embeddedObject::ReferenceType refType ) { // The object may already exists. This can happen if an object is // identified by both a Content-Id and a Content-Location. In this // case, there will be two embedded objects with two different IDs // but referencing the same content. mediaType type; shared_ptr ctf = part.getHeader()->findField(fields::CONTENT_TYPE); if (ctf) { type = *ctf->getValue (); } else { // No "Content-type" field: assume "application/octet-stream". } m_objects.push_back( make_shared ( vmime::clone(part.getBody()->getContents()), part.getBody()->getEncoding(), id, type, refType ) ); } void htmlTextPart::parse( const shared_ptr & message, const shared_ptr & parent, const shared_ptr & textPart ) { // Search for possible embedded objects in the _whole_ message. std::vector > cidParts; std::vector > locParts; findEmbeddedParts(*message, cidParts, locParts); // Extract HTML text std::ostringstream oss; utility::outputStreamAdapter adapter(oss); textPart->getBody()->getContents()->extract(adapter); const string data = oss.str(); m_text = textPart->getBody()->getContents()->clone(); // Find charset shared_ptr ctf = textPart->getHeader()->findField (fields::CONTENT_TYPE); if (ctf && ctf->hasCharset()) { m_charset = ctf->getCharset(); } else { m_charset = charset(); } // Extract embedded objects. The algorithm is quite simple: for each previously // found inline part, we check if its CID/Location is contained in the HTML text. for (std::vector >::const_iterator p = cidParts.begin() ; p != cidParts.end() ; ++p) { const shared_ptr midField = (*p)->getHeader()->findField(fields::CONTENT_ID); const messageId mid = *midField->getValue (); if (data.find("CID:" + mid.getId()) != string::npos || data.find("cid:" + mid.getId()) != string::npos) { // This part is referenced in the HTML text. // Add it to the embedded object list. addEmbeddedObject(**p, mid.getId(), embeddedObject::REFERENCED_BY_ID); } } for (std::vector >::const_iterator p = locParts.begin() ; p != locParts.end() ; ++p) { const shared_ptr locField = (*p)->getHeader()->findField(fields::CONTENT_LOCATION); const text loc = *locField->getValue (); const string locStr = loc.getWholeBuffer(); if (data.find(locStr) != string::npos) { // This part is referenced in the HTML text. // Add it to the embedded object list. addEmbeddedObject(**p, locStr, embeddedObject::REFERENCED_BY_LOCATION); } } // Extract plain text, if any. if (!findPlainTextPart(*message, *parent, *textPart)) { m_plainText = make_shared (); } } bool htmlTextPart::findPlainTextPart( const bodyPart& part, const bodyPart& parent, const bodyPart& textPart ) { // We search for the nearest "multipart/alternative" part. const shared_ptr ctf = part.getHeader()->findField(fields::CONTENT_TYPE); if (ctf) { const mediaType type = *ctf->getValue (); if (type.getType() == mediaTypes::MULTIPART && type.getSubType() == mediaTypes::MULTIPART_ALTERNATIVE) { shared_ptr foundPart; for (size_t i = 0 ; i < part.getBody()->getPartCount() ; ++i) { const shared_ptr p = part.getBody()->getPartAt(i); if (p.get() == &parent || // if "text/html" is in "multipart/related" p.get() == &textPart) { // if not... foundPart = p; } } if (foundPart) { bool found = false; // Now, search for the alternative plain text part for (size_t i = 0 ; !found && i < part.getBody()->getPartCount() ; ++i) { const shared_ptr p = part.getBody()->getPartAt(i); const shared_ptr ctf = p->getHeader()->findField(fields::CONTENT_TYPE); if (ctf) { const mediaType type = *ctf->getValue (); if (type.getType() == mediaTypes::TEXT && type.getSubType() == mediaTypes::TEXT_PLAIN) { m_plainText = p->getBody()->getContents()->clone(); found = true; } } else { // No "Content-type" field. } } // If we don't have found the plain text part here, it means that // it does not exists (the MUA which built this message probably // did not include it...). return found; } } } else { // No "Content-type" field. } bool found = false; for (size_t i = 0 ; !found && i < part.getBody()->getPartCount() ; ++i) { found = findPlainTextPart(*part.getBody()->getPartAt(i), parent, textPart); } return found; } const charset& htmlTextPart::getCharset() const { return m_charset; } void htmlTextPart::setCharset(const charset& ch) { m_charset = ch; } shared_ptr htmlTextPart::getPlainText() const { return m_plainText; } void htmlTextPart::setPlainText(const shared_ptr & plainText) { m_plainText = plainText->clone(); } const shared_ptr htmlTextPart::getText() const { return m_text; } void htmlTextPart::setText(const shared_ptr & text) { m_text = text->clone(); } size_t htmlTextPart::getObjectCount() const { return m_objects.size(); } shared_ptr htmlTextPart::getObjectAt(const size_t pos) const { return m_objects[pos]; } shared_ptr htmlTextPart::findObject(const string& id) const { for (std::vector >::const_iterator o = m_objects.begin() ; o != m_objects.end() ; ++o) { if ((*o)->matchesId(id)) { return *o; } } return null; } bool htmlTextPart::hasObject(const string& id) const { for (std::vector >::const_iterator o = m_objects.begin() ; o != m_objects.end() ; ++o) { if ((*o)->matchesId(id)) { return true; } } return false; } shared_ptr htmlTextPart::addObject( const shared_ptr & data, const vmime::encoding& enc, const mediaType& type ) { const messageId mid(messageId::generateId()); shared_ptr obj = make_shared ( data, enc, mid.getId(), type, embeddedObject::REFERENCED_BY_ID ); m_objects.push_back(obj); return obj; } shared_ptr htmlTextPart::addObject( const shared_ptr & data, const mediaType& type ) { return addObject(data, encoding::decide(data), type); } shared_ptr htmlTextPart::addObject( const string& data, const mediaType& type ) { shared_ptr cts = make_shared (data); return addObject(cts, encoding::decide(cts), type); } // // htmlTextPart::embeddedObject // htmlTextPart::embeddedObject::embeddedObject( const shared_ptr & data, const encoding& enc, const string& id, const mediaType& type, const ReferenceType refType ) : m_data(vmime::clone(data)), m_encoding(enc), m_id(id), m_type(type), m_refType(refType) { } shared_ptr htmlTextPart::embeddedObject::getData() const { return m_data; } const vmime::encoding htmlTextPart::embeddedObject::getEncoding() const { return m_encoding; } const string htmlTextPart::embeddedObject::getId() const { return m_id; } const string htmlTextPart::embeddedObject::getReferenceId() const { if (m_refType == REFERENCED_BY_ID) { return string("cid:") + m_id; } else { return m_id; } } const mediaType htmlTextPart::embeddedObject::getType() const { return m_type; } htmlTextPart::embeddedObject::ReferenceType htmlTextPart::embeddedObject::getReferenceType() const { return m_refType; } bool htmlTextPart::embeddedObject::matchesId(const string& id) const { if (m_refType == REFERENCED_BY_ID) { return m_id == cleanId(id); } else { return m_id == id; } } // static const string htmlTextPart::embeddedObject::cleanId(const string& id) { if (id.length() >= 4 && (id[0] == 'c' || id[0] == 'C') && (id[1] == 'i' || id[1] == 'I') && (id[2] == 'd' || id[2] == 'D') && id[3] == ':') { return id.substr(4); } else { return id; } } } // vmime