tools/email_utils_patch.py - incubator-ponymail - Git at Google

 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 """
 Possible patch for Python email package
 The current code calls unquote() in collapse_rfc2231_value
 when it has already called unquote().
 Double-unquoting mangles raw values that happen to be enclosed in quotes.

 This was first discovered for multi-part boundaries, some of which look like:
 boundary="<<<abcd>>>"
 Strictly speaking < and > are not valid as boundary chars, but they are seen in the wild.
 A similar problem exists for filenames which start/end with "/" or </>
 These are valid (but unusual)

 One way to fix this is to replace the faulty version of collapse_rfc2231_value.

 To use:

 import email_utils_patch
 ...
 email_utils_patch.patch()

 """

 from email import utils

 # Copy of utils.collapse_rfc2231_value with unquote() calls removed
 def _collapse_rfc2231_value(value, errors='replace',
                            fallback_charset='us-ascii'):
     if not isinstance(value, tuple) or len(value) != 3:
         return value
     # While value comes to us as a unicode string, we need it to be a bytes
     # object.  We do not want bytes() normal utf-8 decoder, we want a straight
     # interpretation of the string as character bytes.
     charset, _language, text = value
     if charset is None:
         # Issue 17369: if charset/lang is None, decode_rfc2231 couldn't parse
         # the value, so use the fallback_charset.
         charset = fallback_charset
     rawbytes = bytes(text, 'raw-unicode-escape')
     try:
         return str(rawbytes, charset, errors)
     except LookupError:
         # charset is not a known codec.
         return text

 def patch():
     old = utils.collapse_rfc2231_value
     utils.collapse_rfc2231_value = _collapse_rfc2231_value
     print("Overiding broken collapse_rfc2231_value")
     return old
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""
	Possible patch for Python email package
	The current code calls unquote() in collapse_rfc2231_value
	when it has already called unquote().
	Double-unquoting mangles raw values that happen to be enclosed in quotes.

	This was first discovered for multi-part boundaries, some of which look like:
	boundary="<<<abcd>>>"
	Strictly speaking < and > are not valid as boundary chars, but they are seen in the wild.
	A similar problem exists for filenames which start/end with "/" or </>
	These are valid (but unusual)

	One way to fix this is to replace the faulty version of collapse_rfc2231_value.

	To use:

	import email_utils_patch
	...
	email_utils_patch.patch()

	"""

	from email import utils

	# Copy of utils.collapse_rfc2231_value with unquote() calls removed
	def _collapse_rfc2231_value(value, errors='replace',
	fallback_charset='us-ascii'):
	if not isinstance(value, tuple) or len(value) != 3:
	return value
	# While value comes to us as a unicode string, we need it to be a bytes
	# object. We do not want bytes() normal utf-8 decoder, we want a straight
	# interpretation of the string as character bytes.
	charset, _language, text = value
	if charset is None:
	# Issue 17369: if charset/lang is None, decode_rfc2231 couldn't parse
	# the value, so use the fallback_charset.
	charset = fallback_charset
	rawbytes = bytes(text, 'raw-unicode-escape')
	try:
	return str(rawbytes, charset, errors)
	except LookupError:
	# charset is not a known codec.
	return text

	def patch():
	old = utils.collapse_rfc2231_value
	utils.collapse_rfc2231_value = _collapse_rfc2231_value
	print("Overiding broken collapse_rfc2231_value")
	return old