blob: 57a12c414b0450685b40fe0dd9e030225d5c5c71 [file] [log] [blame]
/**************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
// MARKER(update_precomp.py): autogen include statement, do not remove
#include "precompiled_svl.hxx"
#include <tools/inetmime.hxx>
#include <svl/adrparse.hxx>
namespace unnamed_svl_adrparse {}
using namespace unnamed_svl_adrparse;
// unnamed namespaces don't work well yet
//============================================================================
namespace unnamed_svl_adrparse {
enum ElementType { ELEMENT_START, ELEMENT_DELIM, ELEMENT_ITEM, ELEMENT_END };
//============================================================================
struct ParsedAddrSpec
{
sal_Unicode const * m_pBegin;
sal_Unicode const * m_pEnd;
ElementType m_eLastElem;
bool m_bAtFound;
bool m_bReparse;
ParsedAddrSpec() { reset(); }
bool isPoorlyValid() const { return m_eLastElem >= ELEMENT_ITEM; }
bool isValid() const { return isPoorlyValid() && m_bAtFound; }
inline void reset();
inline void finish();
};
inline void ParsedAddrSpec::reset()
{
m_pBegin = 0;
m_pEnd = 0;
m_eLastElem = ELEMENT_START;
m_bAtFound = false;
m_bReparse = false;
}
inline void ParsedAddrSpec::finish()
{
if (isPoorlyValid())
m_eLastElem = ELEMENT_END;
else
reset();
}
}
//============================================================================
class SvAddressParser_Impl
{
enum State { BEFORE_COLON, BEFORE_LESS, AFTER_LESS, AFTER_GREATER };
enum TokenType { TOKEN_QUOTED = 0x80000000, TOKEN_DOMAIN, TOKEN_COMMENT,
TOKEN_ATOM };
sal_Unicode const * m_pInputPos;
sal_Unicode const * m_pInputEnd;
sal_uInt32 m_nCurToken;
sal_Unicode const * m_pCurTokenBegin;
sal_Unicode const * m_pCurTokenEnd;
sal_Unicode const * m_pCurTokenContentBegin;
sal_Unicode const * m_pCurTokenContentEnd;
bool m_bCurTokenReparse;
ParsedAddrSpec m_aOuterAddrSpec;
ParsedAddrSpec m_aInnerAddrSpec;
ParsedAddrSpec * m_pAddrSpec;
sal_Unicode const * m_pRealNameBegin;
sal_Unicode const * m_pRealNameEnd;
sal_Unicode const * m_pRealNameContentBegin;
sal_Unicode const * m_pRealNameContentEnd;
bool m_bRealNameReparse;
bool m_bRealNameFinished;
sal_Unicode const * m_pFirstCommentBegin;
sal_Unicode const * m_pFirstCommentEnd;
bool m_bFirstCommentReparse;
State m_eState;
TokenType m_eType;
inline void resetRealNameAndFirstComment();
inline void reset();
inline void addTokenToAddrSpec(ElementType eTokenElem);
inline void addTokenToRealName();
bool readToken();
static UniString reparse(sal_Unicode const * pBegin,
sal_Unicode const * pEnd, bool bAddrSpec);
static UniString reparseComment(sal_Unicode const * pBegin,
sal_Unicode const * pEnd);
public:
SvAddressParser_Impl(SvAddressParser * pParser, UniString const & rInput);
};
inline void SvAddressParser_Impl::resetRealNameAndFirstComment()
{
m_pRealNameBegin = 0;
m_pRealNameEnd = 0;
m_pRealNameContentBegin = 0;
m_pRealNameContentEnd = 0;
m_bRealNameReparse = false;
m_bRealNameFinished = false;
m_pFirstCommentBegin = 0;
m_pFirstCommentEnd = 0;
m_bFirstCommentReparse = false;
}
inline void SvAddressParser_Impl::reset()
{
m_aOuterAddrSpec.reset();
m_aInnerAddrSpec.reset();
m_pAddrSpec = &m_aOuterAddrSpec;
resetRealNameAndFirstComment();
m_eState = BEFORE_COLON;
m_eType = TOKEN_ATOM;
}
inline void SvAddressParser_Impl::addTokenToAddrSpec(ElementType eTokenElem)
{
if (!m_pAddrSpec->m_pBegin)
m_pAddrSpec->m_pBegin = m_pCurTokenBegin;
else if (m_pAddrSpec->m_pEnd < m_pCurTokenBegin)
m_pAddrSpec->m_bReparse = true;
m_pAddrSpec->m_pEnd = m_pCurTokenEnd;
m_pAddrSpec->m_eLastElem = eTokenElem;
}
inline void SvAddressParser_Impl::addTokenToRealName()
{
if (!m_bRealNameFinished && m_eState != AFTER_LESS)
{
if (!m_pRealNameBegin)
m_pRealNameBegin = m_pRealNameContentBegin = m_pCurTokenBegin;
else if (m_pRealNameEnd < m_pCurTokenBegin - 1
|| (m_pRealNameEnd == m_pCurTokenBegin - 1
&& *m_pRealNameEnd != ' '))
m_bRealNameReparse = true;
m_pRealNameEnd = m_pRealNameContentEnd = m_pCurTokenEnd;
}
}
//============================================================================
//
// SvAddressParser_Impl
//
//============================================================================
bool SvAddressParser_Impl::readToken()
{
m_nCurToken = m_eType;
m_bCurTokenReparse = false;
switch (m_eType)
{
case TOKEN_QUOTED:
{
m_pCurTokenBegin = m_pInputPos - 1;
m_pCurTokenContentBegin = m_pInputPos;
bool bEscaped = false;
for (;;)
{
if (m_pInputPos >= m_pInputEnd)
return false;
sal_Unicode cChar = *m_pInputPos++;
if (bEscaped)
{
m_bCurTokenReparse = true;
bEscaped = false;
}
else if (cChar == '"')
{
m_pCurTokenEnd = m_pInputPos;
m_pCurTokenContentEnd = m_pInputPos - 1;
return true;
}
else if (cChar == '\\')
bEscaped = true;
}
}
case TOKEN_DOMAIN:
{
m_pCurTokenBegin = m_pInputPos - 1;
m_pCurTokenContentBegin = m_pInputPos;
bool bEscaped = false;
for (;;)
{
if (m_pInputPos >= m_pInputEnd)
return false;
sal_Unicode cChar = *m_pInputPos++;
if (bEscaped)
bEscaped = false;
else if (cChar == ']')
{
m_pCurTokenEnd = m_pInputPos;
return true;
}
else if (cChar == '\\')
bEscaped = true;
}
}
case TOKEN_COMMENT:
{
m_pCurTokenBegin = m_pInputPos - 1;
m_pCurTokenContentBegin = 0;
m_pCurTokenContentEnd = 0;
bool bEscaped = false;
xub_StrLen nLevel = 0;
for (;;)
{
if (m_pInputPos >= m_pInputEnd)
return false;
sal_Unicode cChar = *m_pInputPos++;
if (bEscaped)
{
m_bCurTokenReparse = true;
m_pCurTokenContentEnd = m_pInputPos;
bEscaped = false;
}
else if (cChar == '(')
{
if (!m_pCurTokenContentBegin)
m_pCurTokenContentBegin = m_pInputPos - 1;
m_pCurTokenContentEnd = m_pInputPos;
++nLevel;
}
else if (cChar == ')')
if (nLevel)
{
m_pCurTokenContentEnd = m_pInputPos;
--nLevel;
}
else
return true;
else if (cChar == '\\')
{
if (!m_pCurTokenContentBegin)
m_pCurTokenContentBegin = m_pInputPos - 1;
bEscaped = true;
}
else if (cChar > ' ' && cChar != 0x7F) // DEL
{
if (!m_pCurTokenContentBegin)
m_pCurTokenContentBegin = m_pInputPos - 1;
m_pCurTokenContentEnd = m_pInputPos;
}
}
}
default:
{
sal_Unicode cChar;
for (;;)
{
if (m_pInputPos >= m_pInputEnd)
return false;
cChar = *m_pInputPos++;
if (cChar > ' ' && cChar != 0x7F) // DEL
break;
}
m_pCurTokenBegin = m_pInputPos - 1;
if (cChar == '"' || cChar == '(' || cChar == ')' || cChar == ','
|| cChar == '.' || cChar == ':' || cChar == ';'
|| cChar == '<' || cChar == '>' || cChar == '@'
|| cChar == '[' || cChar == '\\' || cChar == ']')
{
m_nCurToken = cChar;
m_pCurTokenEnd = m_pInputPos;
return true;
}
else
for (;;)
{
if (m_pInputPos >= m_pInputEnd)
{
m_pCurTokenEnd = m_pInputPos;
return true;
}
cChar = *m_pInputPos++;
if (cChar <= ' ' || cChar == '"' || cChar == '('
|| cChar == ')' || cChar == ',' || cChar == '.'
|| cChar == ':' || cChar == ';' || cChar == '<'
|| cChar == '>' || cChar == '@' || cChar == '['
|| cChar == '\\' || cChar == ']'
|| cChar == 0x7F) // DEL
{
m_pCurTokenEnd = --m_pInputPos;
return true;
}
}
}
}
}
//============================================================================
// static
UniString SvAddressParser_Impl::reparse(sal_Unicode const * pBegin,
sal_Unicode const * pEnd,
bool bAddrSpec)
{
UniString aResult;
TokenType eMode = TOKEN_ATOM;
bool bEscaped = false;
bool bEndsWithSpace = false;
xub_StrLen nLevel = 0;
while (pBegin < pEnd)
{
sal_Unicode cChar = *pBegin++;
switch (eMode)
{
case TOKEN_QUOTED:
if (bEscaped)
{
aResult += cChar;
bEscaped = false;
}
else if (cChar == '"')
{
if (bAddrSpec)
aResult += cChar;
eMode = TOKEN_ATOM;
}
else if (cChar == '\\')
{
if (bAddrSpec)
aResult += cChar;
bEscaped = true;
}
else
aResult += cChar;
break;
case TOKEN_DOMAIN:
if (bEscaped)
{
aResult += cChar;
bEscaped = false;
}
else if (cChar == ']')
{
aResult += cChar;
eMode = TOKEN_ATOM;
}
else if (cChar == '\\')
{
if (bAddrSpec)
aResult += cChar;
bEscaped = true;
}
else
aResult += cChar;
break;
case TOKEN_COMMENT:
if (bEscaped)
bEscaped = false;
else if (cChar == '(')
++nLevel;
else if (cChar == ')')
if (nLevel)
--nLevel;
else
eMode = TOKEN_ATOM;
else if (cChar == '\\')
bEscaped = true;
break;
case TOKEN_ATOM:
if (cChar <= ' ' || cChar == 0x7F) // DEL
{
if (!bAddrSpec && !bEndsWithSpace)
{
aResult += ' ';
bEndsWithSpace = true;
}
}
else if (cChar == '(')
{
if (!bAddrSpec && !bEndsWithSpace)
{
aResult += ' ';
bEndsWithSpace = true;
}
eMode = TOKEN_COMMENT;
}
else
{
bEndsWithSpace = false;
if (cChar == '"')
{
if (bAddrSpec)
aResult += cChar;
eMode = TOKEN_QUOTED;
}
else if (cChar == '[')
{
aResult += cChar;
eMode = TOKEN_QUOTED;
}
else
aResult += cChar;
}
break;
}
}
return aResult;
}
//============================================================================
// static
UniString SvAddressParser_Impl::reparseComment(sal_Unicode const * pBegin,
sal_Unicode const * pEnd)
{
UniString aResult;
while (pBegin < pEnd)
{
sal_Unicode cChar = *pBegin++;
if (cChar == '\\')
cChar = *pBegin++;
aResult += cChar;
}
return aResult;
}
//============================================================================
SvAddressParser_Impl::SvAddressParser_Impl(SvAddressParser * pParser,
UniString const & rInput)
{
m_pInputPos = rInput.GetBuffer();
m_pInputEnd = m_pInputPos + rInput.Len();
reset();
bool bDone = false;
for (;;)
{
if (!readToken())
{
m_bRealNameFinished = true;
if (m_eState == AFTER_LESS)
m_nCurToken = '>';
else
{
m_nCurToken = ',';
bDone = true;
}
}
switch (m_nCurToken)
{
case TOKEN_QUOTED:
if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
{
if (m_pAddrSpec->m_bAtFound
|| m_pAddrSpec->m_eLastElem <= ELEMENT_DELIM)
m_pAddrSpec->reset();
addTokenToAddrSpec(ELEMENT_ITEM);
}
if (!m_bRealNameFinished && m_eState != AFTER_LESS)
{
if (m_bCurTokenReparse)
{
if (!m_pRealNameBegin)
m_pRealNameBegin = m_pCurTokenBegin;
m_pRealNameEnd = m_pCurTokenEnd;
m_bRealNameReparse = true;
}
else if (m_bRealNameReparse)
m_pRealNameEnd = m_pCurTokenEnd;
else if (!m_pRealNameBegin)
{
m_pRealNameBegin = m_pCurTokenBegin;
m_pRealNameContentBegin = m_pCurTokenContentBegin;
m_pRealNameEnd = m_pRealNameContentEnd
= m_pCurTokenContentEnd;
}
else
{
m_pRealNameEnd = m_pCurTokenEnd;
m_bRealNameReparse = true;
}
}
m_eType = TOKEN_ATOM;
break;
case TOKEN_DOMAIN:
if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
{
if (m_pAddrSpec->m_bAtFound
&& m_pAddrSpec->m_eLastElem == ELEMENT_DELIM)
addTokenToAddrSpec(ELEMENT_ITEM);
else
m_pAddrSpec->reset();
}
addTokenToRealName();
m_eType = TOKEN_ATOM;
break;
case TOKEN_COMMENT:
if (!m_bRealNameFinished && m_eState != AFTER_LESS
&& !m_pFirstCommentBegin && m_pCurTokenContentBegin)
{
m_pFirstCommentBegin = m_pCurTokenContentBegin;
m_pFirstCommentEnd = m_pCurTokenContentEnd;
m_bFirstCommentReparse = m_bCurTokenReparse;
}
m_eType = TOKEN_ATOM;
break;
case TOKEN_ATOM:
if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
{
if (m_pAddrSpec->m_eLastElem != ELEMENT_DELIM)
m_pAddrSpec->reset();
addTokenToAddrSpec(ELEMENT_ITEM);
}
addTokenToRealName();
break;
case '(':
m_eType = TOKEN_COMMENT;
break;
case ')':
case '\\':
case ']':
m_pAddrSpec->finish();
addTokenToRealName();
break;
case '<':
switch (m_eState)
{
case BEFORE_COLON:
case BEFORE_LESS:
m_aOuterAddrSpec.finish();
if (m_pRealNameBegin)
m_bRealNameFinished = true;
m_pAddrSpec = &m_aInnerAddrSpec;
m_eState = AFTER_LESS;
break;
case AFTER_LESS:
m_aInnerAddrSpec.finish();
break;
case AFTER_GREATER:
m_aOuterAddrSpec.finish();
addTokenToRealName();
break;
}
break;
case '>':
if (m_eState == AFTER_LESS)
{
m_aInnerAddrSpec.finish();
if (m_aInnerAddrSpec.isValid())
m_aOuterAddrSpec.m_eLastElem = ELEMENT_END;
m_pAddrSpec = &m_aOuterAddrSpec;
m_eState = AFTER_GREATER;
}
else
{
m_aOuterAddrSpec.finish();
addTokenToRealName();
}
break;
case '@':
if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
{
if (!m_pAddrSpec->m_bAtFound
&& m_pAddrSpec->m_eLastElem == ELEMENT_ITEM)
{
addTokenToAddrSpec(ELEMENT_DELIM);
m_pAddrSpec->m_bAtFound = true;
}
else
m_pAddrSpec->reset();
}
addTokenToRealName();
break;
case ',':
case ';':
if (m_eState == AFTER_LESS)
if (m_nCurToken == ',')
{
if (m_aInnerAddrSpec.m_eLastElem
!= ELEMENT_END)
m_aInnerAddrSpec.reset();
}
else
m_aInnerAddrSpec.finish();
else
{
m_pAddrSpec = m_aInnerAddrSpec.isValid()
|| (!m_aOuterAddrSpec.isValid()
&& m_aInnerAddrSpec.isPoorlyValid()) ?
&m_aInnerAddrSpec :
m_aOuterAddrSpec.isPoorlyValid() ?
&m_aOuterAddrSpec : 0;
if (m_pAddrSpec)
{
UniString aTheAddrSpec;
if (m_pAddrSpec->m_bReparse)
aTheAddrSpec = reparse(m_pAddrSpec->m_pBegin,
m_pAddrSpec->m_pEnd, true);
else
{
xub_StrLen nLen =
sal::static_int_cast< xub_StrLen >(
m_pAddrSpec->m_pEnd
- m_pAddrSpec->m_pBegin);
if (nLen == rInput.Len())
aTheAddrSpec = rInput;
else
aTheAddrSpec
= rInput.Copy(
sal::static_int_cast< xub_StrLen >(
m_pAddrSpec->m_pBegin
- rInput.GetBuffer()),
nLen);
}
UniString aTheRealName;
if (!m_pRealNameBegin
|| (m_pAddrSpec == &m_aOuterAddrSpec
&& m_pRealNameBegin
== m_aOuterAddrSpec.m_pBegin
&& m_pRealNameEnd == m_aOuterAddrSpec.m_pEnd
&& m_pFirstCommentBegin))
if (!m_pFirstCommentBegin)
aTheRealName = aTheAddrSpec;
else if (m_bFirstCommentReparse)
aTheRealName
= reparseComment(m_pFirstCommentBegin,
m_pFirstCommentEnd);
else
aTheRealName
= rInput.Copy(
sal::static_int_cast< xub_StrLen >(
m_pFirstCommentBegin
- rInput.GetBuffer()),
sal::static_int_cast< xub_StrLen >(
m_pFirstCommentEnd
- m_pFirstCommentBegin));
else if (m_bRealNameReparse)
aTheRealName = reparse(m_pRealNameBegin,
m_pRealNameEnd, false);
else
{
xub_StrLen nLen =
sal::static_int_cast< xub_StrLen >(
m_pRealNameContentEnd
- m_pRealNameContentBegin);
if (nLen == rInput.Len())
aTheRealName = rInput;
else
aTheRealName
= rInput.Copy(
sal::static_int_cast< xub_StrLen >(
m_pRealNameContentBegin
- rInput.GetBuffer()),
nLen);
}
if (pParser->m_bHasFirst)
pParser->m_aRest.Insert(new SvAddressEntry_Impl(
aTheAddrSpec,
aTheRealName),
LIST_APPEND);
else
{
pParser->m_bHasFirst = true;
pParser->m_aFirst.m_aAddrSpec = aTheAddrSpec;
pParser->m_aFirst.m_aRealName = aTheRealName;
}
}
if (bDone)
return;
reset();
}
break;
case ':':
switch (m_eState)
{
case BEFORE_COLON:
m_aOuterAddrSpec.reset();
resetRealNameAndFirstComment();
m_eState = BEFORE_LESS;
break;
case BEFORE_LESS:
case AFTER_GREATER:
m_aOuterAddrSpec.finish();
addTokenToRealName();
break;
case AFTER_LESS:
m_aInnerAddrSpec.reset();
break;
}
break;
case '"':
m_eType = TOKEN_QUOTED;
break;
case '.':
if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
{
if (m_pAddrSpec->m_eLastElem != ELEMENT_DELIM)
addTokenToAddrSpec(ELEMENT_DELIM);
else
m_pAddrSpec->reset();
}
addTokenToRealName();
break;
case '[':
m_eType = TOKEN_DOMAIN;
break;
}
}
}
//============================================================================
//
// SvAddressParser
//
//============================================================================
SvAddressParser::SvAddressParser(UniString const & rInput): m_bHasFirst(false)
{
SvAddressParser_Impl(this, rInput);
}
//============================================================================
SvAddressParser::~SvAddressParser()
{
for (sal_uLong i = m_aRest.Count(); i != 0;)
delete m_aRest.Remove(--i);
}
//============================================================================
// static
bool SvAddressParser::createRFC822Mailbox(String const & rPhrase,
String const & rAddrSpec,
String & rMailbox)
{
String aTheAddrSpec;
sal_Unicode const * p = rAddrSpec.GetBuffer();
sal_Unicode const * pEnd = p + rAddrSpec.Len();
{for (bool bSegment = false;;)
{
p = INetMIME::skipLinearWhiteSpaceComment(p, pEnd);
if (p == pEnd)
return false;
if (bSegment)
{
sal_Unicode c = *p++;
if (c == '@')
break;
else if (c != '.')
return false;
aTheAddrSpec += '.';
p = INetMIME::skipLinearWhiteSpaceComment(p, pEnd);
if (p == pEnd)
return false;
}
else
bSegment = true;
if (*p == '"')
{
aTheAddrSpec += *p++;
for (;;)
{
if (INetMIME::startsWithLineFolding(p, pEnd))
p += 2;
if (p == pEnd)
return false;
if (*p == '"')
break;
if (*p == '\x0D' || (*p == '\\' && ++p == pEnd)
|| !INetMIME::isUSASCII(*p))
return false;
if (INetMIME::needsQuotedStringEscape(*p))
aTheAddrSpec += '\\';
aTheAddrSpec += *p++;
}
aTheAddrSpec += *p++;
}
else if (INetMIME::isAtomChar(*p))
while (p != pEnd && INetMIME::isAtomChar(*p))
aTheAddrSpec += *p++;
else
return false;
}}
aTheAddrSpec += '@';
{for (bool bSegment = false;;)
{
p = INetMIME::skipLinearWhiteSpaceComment(p, pEnd);
if (p == pEnd)
{
if (bSegment)
break;
else
return false;
}
if (bSegment)
{
if (*p++ != '.')
return false;
aTheAddrSpec += '.';
p = INetMIME::skipLinearWhiteSpaceComment(p, pEnd);
if (p == pEnd)
return false;
}
else
bSegment = true;
if (*p == '[')
{
aTheAddrSpec += *p++;
for (;;)
{
if (INetMIME::startsWithLineFolding(p, pEnd))
p += 2;
if (p == pEnd)
return false;
if (*p == ']')
break;
if (*p == '\x0D' || *p == '[' || (*p == '\\' && ++p == pEnd)
|| !INetMIME::isUSASCII(*p))
return false;
if (*p >= '[' && *p <= ']')
aTheAddrSpec += '\\';
aTheAddrSpec += *p++;
}
aTheAddrSpec += *p++;
}
else if (INetMIME::isAtomChar(*p))
while (p != pEnd && INetMIME::isAtomChar(*p))
aTheAddrSpec += *p++;
else
return false;
}}
if (rPhrase.Len() == 0)
rMailbox = aTheAddrSpec;
else
{
bool bQuotedString = false;
p = rPhrase.GetBuffer();
pEnd = p + rPhrase.Len();
for (;p != pEnd; ++p)
if (!(INetMIME::isAtomChar(*p)))
{
bQuotedString = true;
break;
}
String aTheMailbox;
if (bQuotedString)
{
aTheMailbox = '"';
for (p = rPhrase.GetBuffer(); p != pEnd; ++p)
{
if (INetMIME::needsQuotedStringEscape(*p))
aTheMailbox += '\\';
aTheMailbox += *p;
}
aTheMailbox += '"';
}
else
aTheMailbox = rPhrase;
aTheMailbox.AppendAscii(RTL_CONSTASCII_STRINGPARAM(" <"));
aTheMailbox += aTheAddrSpec;
aTheMailbox += '>';
rMailbox = aTheMailbox;
}
return true;
}