/** * @name EmailInbound (Mail Part Handler for POCO) * @description Receive and sort emails */ #include "MyPartHandler.h" /** * POCO::Net::PartHandler * Documentation: http://pocoproject.org/docs/ * Called for every part encountered during the processing of an email message * For Multipart message, it has the following pattern: * Content-Transfer-Encoding="quoted-printable" * Content-Type="text/plain"; charset="us-ascii" * .... * Content-Transfer-Encoding="quoted-printable" * Content-Type="text/html"; charset="us-ascii" * <html> * .... * </html> */ void MyPartHandler::handlePart(const MessageHeader& messageHeader, std::istream& stream) { stringstream headerSS; messageHeader.write(headerSS); _headers.push_back(headerSS.str()); if(messageHeader.has("Content-Disposition")) { //If there is any file attachment, append the filename and attachment to vectors string disp; string filename; string attachment; NameValueCollection params; MessageHeader::splitParameters(messageHeader["Content-Disposition"], disp, params); filename = params.get("filename", "nil"); if(filename != "nil") { // Filename might be encoded in Base64 or QuotedPrintable _filenames.push_back(DecodeString(filename)); StreamCopier::copyToString(stream, attachment); _attachments.push_back(attachment); } } string contentType = messageHeader.get("Content-Type", "nil"); if((String::ToLower(contentType)).find("multipart") == 0) { MultipartReader multipartReader(stream); while(multipartReader.hasNextPart()) { MessageHeader subMessageHeader; multipartReader.nextPart(subMessageHeader); string subContentType = subMessageHeader.get("Content-Type", "nil"); // Convert to lower case for comparison only string lc_subctype = String::ToLower(subContentType); //Priority is text/plain format, else save text/html format if(lc_subctype == "nil") { continue; } else if(lc_subctype.find("application") != string::npos && lc_subctype.find("name") != string::npos) { // Save attachment(s) in sub-content part string disp; string filename; string attachment; NameValueCollection params; MessageHeader::splitParameters(lc_subctype, disp, params); filename = params.get("name", "nil"); if(filename != "nil") { // Filename and Attachments might be encoded in Base64 or QuotedPrintable _filenames.push_back(DecodeString(filename)); string encoder = String::ToLower(subMessageHeader.get("Content-Transfer-Encoding", "nil")); if(encoder == "base64") { Poco::Base64Decoder base64Decoder(multipartReader.stream()); StreamCopier::copyToString(base64Decoder, attachment); } else if(encoder == "quoted-printable") { Poco::Net::QuotedPrintableDecoder qpDecoder(multipartReader.stream()); StreamCopier::copyToString(qpDecoder, attachment); } else { StreamCopier::copyToString(multipartReader.stream(), attachment); } if (!attachment.empty()) { _attachments.push_back(attachment); } } } else if(lc_subctype.find("boundary") != string::npos) { int bStart = 0; if(_myboundary.empty()) { bStart = subContentType.find('_'); _myboundary = String::FixField(subContentType, bStart, (subContentType.length() - (bStart + 1))); } } else if(lc_subctype.find("text/plain") == 0) { string charset; if(subContentType.find("charset") != string::npos) { //Outlook: Content-Type text/plain charset="us-ascii" //Yahoo: Content-Type text/plain charset=iso-8859-1 string subct_clean = String::RemoveChar(subContentType, '"'); int charpos = subct_clean.find("charset=") + 8; //+8 to bypass the word "charset=" charset = String::FixField(subct_clean, charpos, (subContentType.length() - charpos) ); } //If body variable is not empty, it has the text/plain format of the email body. string cte = subMessageHeader.get("Content-Transfer-Encoding", "nil"); //For some reasons, emails from outlook (content transfer encoding is specified as quoted-printable in header), it generates nil result in QuotedPrintableDecoder if(charset.compare("us-ascii") != 0) { if(cte == "base64") { Poco::Base64Decoder base64Decoder(multipartReader.stream()); StreamCopier::copyToString(base64Decoder, _body); } else if(cte == "quoted-printable") { Poco::Net::QuotedPrintableDecoder qpDecoder(multipartReader.stream()); StreamCopier::copyToString(qpDecoder, _body); } else { StreamCopier::copyToString(multipartReader.stream(), _body); } } else { StreamCopier::copyToString(multipartReader.stream(), _body); } if(!_myboundary.empty() && _myboundary.compare(multipartReader.boundary()) != 0) { _body = String::Trim(String::FixField(_body, 0, (_body.find(_myboundary) - 2))); //-2 for the boundary heading, e.g. --_000_OD67Eexchau_ } } else { if(_body.empty() || _body.length() > 0) break; // Will hit error "Malformed message: Field value too long/no CRLF found" under MesssageHeader.read() in MessageHeader.cpp // if "continue" is used. "text/plain" part will always come before "text/html" part //Keep this code for reference of retrieving text/html content, ignore text/html part at this moment /* else if(subContentType.find("text/html") == 0) { string cte = subMessageHeader.get("Content-Transfer-Encoding", "nil"); if(cte == "base64") { Poco::Base64Decoder base64Decoder(multipartReader.stream()); StreamCopier::copyToString(base64Decoder, _body); } else if(cte == "quoted-printable") { Poco::Net::QuotedPrintableDecoder qpDecoder(multipartReader.stream()); StreamCopier::copyToString(qpDecoder, _body); } else StreamCopier::copyToString(stream, _body); */ } } } else if((String::ToLower(contentType)).find("application") != string::npos && (String::ToLower(contentType)).find("name") != string::npos) { // Some oddball emails doesn't have a Content-Disposition clause even though they've attachments. // Decoding is not necessary at top level as POCO will do it automatically. weird...need more testing string disp; string filename; string attachment; NameValueCollection params; MessageHeader::splitParameters(String::ToLower(contentType), disp, params); filename = params.get("name", "nil"); if(filename != "nil") { _filenames.push_back(DecodeString(filename)); /* string encoder = String::ToLower(messageHeader.get("Content-Transfer-Encoding", "nil")); if(encoder == "base64") { Poco::Base64Decoder base64Decoder(stream); StreamCopier::copyToString(base64Decoder, attachment); } else if(encoder == "quoted-printable") { Poco::Net::QuotedPrintableDecoder qpDecoder(stream); StreamCopier::copyToString(qpDecoder, attachment); } else */ StreamCopier::copyToString(stream, attachment); if(!attachment.empty()) _attachments.push_back(attachment); } } else { //Email body content //Change request 20101007: Ignore text/html part if(contentType.find("text/html") == string::npos && (_body.empty() || _body.length() > 0)) StreamCopier::copyToString(stream, _body); } } const vector<string>& MyPartHandler::GetHeaders() { return _headers; } const string& MyPartHandler::GetBody() { return _body; } const vector<string>& MyPartHandler::GetFilenames() { return _filenames; } const vector<string>& MyPartHandler::GetAttachments() { return _attachments; } /** * This function can decode mixed languages within one string (email subject, file attachment) * For example (mixed of english and ukrainian): * charset="windows-1251" * =?windows-1251?Q?outlook:_testing_with_english_text....and_ukrainian_=EA?= =?windows-1251?B?7u3q8/DxIOTw4Oru7bPi8fzq6PUg9+7i7bPi?= **/ string MyPartHandler::DecodeString(string phrase) { //If the phrase is encoded in base64 or quoted printable text, it shows //=?gb2312?B?ztLKc3re4==?= //Which is enclosed in =??= quotes, B stands for 'base64' encoded, Q stands for 'quoted-printable' encoded. if(String::Trim(phrase).length() == 0) { return phrase; } if(String::IsBeginWith(phrase, "=?") && String::IsEndWith(phrase, "?=")) { string utf8Phrase = ""; size_t leftQuotePos = phrase.find("=?"); size_t rightQuotePos = phrase.find("?="); while(leftQuotePos != string::npos && rightQuotePos != string::npos) { //+2 is for the ending ?= string quotedString = phrase.substr(leftQuotePos, rightQuotePos - leftQuotePos + 2); string decodedPhrase = ""; string textEncoding = String::TakeField(quotedString, 2, "?"); string encodedType = String::TakeField(quotedString, 3, "?"); string encodedString = String::TakeField(quotedString, 4, "?"); if(encodedType == "B") { istringstream iss(encodedString); Poco::Base64Decoder base64Decoder(iss); StreamCopier::copyToString(base64Decoder, decodedPhrase); } else if(encodedType == "Q") { istringstream iss(encodedString); Poco::Net::QuotedPrintableDecoder qpDecoder(iss); StreamCopier::copyToString(qpDecoder, decodedPhrase); //Quoted printable treated space as underscore, revert it replace(decodedPhrase.begin(), decodedPhrase.end(), '_', ' '); } else { decodedPhrase = quotedString; //safety measure } if (String::ToLower(textEncoding) != "utf-8") { string errorMessage = ""; string convertedPhrase = ""; //Microsoft Outlook 2007 cannot differentiate between simplified and traditional chinese for email subject. //It will only list the content type as GB2312, thus we need to do a conversion. if (String::ToLower(textEncoding) == "gb2312") { String::ConvertTextEncoding("GBK", "UTF-8", decodedPhrase, convertedPhrase, errorMessage); } else { String::ConvertTextEncoding(textEncoding, "UTF-8", decodedPhrase, convertedPhrase, errorMessage); } if (errorMessage.length() > 0) { return ""; } else { utf8Phrase += convertedPhrase; } } else { utf8Phrase += decodedPhrase; } leftQuotePos = phrase.find("=?", leftQuotePos + 1); rightQuotePos = phrase.find("?=", rightQuotePos + 1); } return utf8Phrase; } else { return phrase; } } void MyPartHandler::CleanUp() { _body = ""; _myboundary = ""; _headers.clear(); _filenames.clear(); _attachments.clear(); }