Improve UTF-8 handling in cmislib to close #CMIS-996

git-svn-id: https://svn.apache.org/repos/asf/chemistry/cmislib/trunk@1776460 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/src/cmislib/atompub/binding.py b/src/cmislib/atompub/binding.py
index f1a629f..8da6145 100644
--- a/src/cmislib/atompub/binding.py
+++ b/src/cmislib/atompub/binding.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 #
 #      Licensed to the Apache Software Foundation (ASF) under one
 #      or more contributor license agreements.  See the NOTICE file
@@ -27,7 +28,7 @@
 from cmislib.exceptions import CmisException, \
     ObjectNotFoundException, InvalidArgumentException, \
     NotSupportedException
-from cmislib.util import multiple_replace, parsePropValue, parseBoolValue, toCMISValue, parseDateTimeValue
+from cmislib.util import multiple_replace, parsePropValue, parseBoolValue, toCMISValue, parseDateTimeValue, safe_quote
 
 from urllib import quote
 from urlparse import urlparse, urlunparse
@@ -1540,7 +1541,7 @@
         template = self.getUriTemplates()['objectbypath']['template']
 
         # fill in the template with the path provided
-        params = {'{path}': quote(path, '/'),
+        params = {'{path}': safe_quote(path),
                   '{filter}': '',
                   '{includeAllowableActions}': 'false',
                   '{includePolicyIds}': 'false',
diff --git a/src/cmislib/browser/binding.py b/src/cmislib/browser/binding.py
index 7d7d758..a9e010b 100644
--- a/src/cmislib/browser/binding.py
+++ b/src/cmislib/browser/binding.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 #
 #      Licensed to the Apache Software Foundation (ASF) under one
 #      or more contributor license agreements.  See the NOTICE file
@@ -25,7 +26,7 @@
 from cmislib.exceptions import CmisException, InvalidArgumentException,\
                                NotSupportedException, ObjectNotFoundException
 from cmislib.net import RESTService as Rest
-from cmislib.util import parsePropValueByType, parseDateTimeValue
+from cmislib.util import parsePropValueByType, parseDateTimeValue, safe_quote
 import json
 import logging
 import StringIO
@@ -715,7 +716,7 @@
          - includeAllowableActions
         """
 
-        byPathUrl = self.getRootFolderUrl() + quote(path) + "?cmisselector=object"
+        byPathUrl = self.getRootFolderUrl() + safe_quote(path) + "?cmisselector=object"
         result = self._cmisClient.binding.get(byPathUrl.encode('utf-8'),
                                               self._cmisClient.username,
                                               self._cmisClient.password,
@@ -926,7 +927,7 @@
         typesUrl = self.getRepositoryUrl() + "?cmisselector=typeChildren"
 
         if typeId is not None:
-            typesUrl += "&typeId=%s" % (quote(typeId))
+            typesUrl += "&typeId=%s" % (safe_quote(typeId))
 
         result = self._cmisClient.binding.get(typesUrl,
                                               self._cmisClient.username,
@@ -986,7 +987,7 @@
         typesUrl = self.getRepositoryUrl() + "?cmisselector=typeDescendants"
 
         if typeId is not None:
-            typesUrl += "&typeId=%s" % (quote(typeId))
+            typesUrl += "&typeId=%s" % (safe_quote(typeId))
         if depth is not None:
             typesUrl += "&depth=%s" % (depth)
         print typesUrl
@@ -1180,7 +1181,7 @@
         """
 
         # build the CMIS query XML that we're going to POST
-        queryUrl = self.getRepositoryUrl() + "?cmisaction=query&q=" + quote(statement)
+        queryUrl = self.getRepositoryUrl() + "?cmisaction=query&q=" + safe_quote(statement)
 
         # do the POST
         result = self._cmisClient.binding.post(queryUrl.encode('utf-8'),
@@ -3161,27 +3162,6 @@
     return content_type, body
 
 
-def safe_urlencode(in_dict):
-
-    """
-    Safe encoding of values taking care of unicode values
-    urllib.urlencode doesn't like unicode values
-    """
-
-    def encoded_dict(in_dict):
-        out_dict = {}
-        for k, v in in_dict.iteritems():
-            if isinstance(v, unicode):
-                v = v.encode('utf8')
-            elif isinstance(v, str):
-                # Must be encoded in UTF-8
-                v.decode('utf8')
-            out_dict[k] = v
-        return out_dict
-
-    return urlencode(encoded_dict(in_dict))
-
-
 class ResultsSerializer(object):
 
     """
diff --git a/src/cmislib/cmis_services.py b/src/cmislib/cmis_services.py
index 8c59cdb..4659d02 100644
--- a/src/cmislib/cmis_services.py
+++ b/src/cmislib/cmis_services.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 #
 #      Licensed to the Apache Software Foundation (ASF) under one
 #      or more contributor license agreements.  See the NOTICE file
diff --git a/src/cmislib/domain.py b/src/cmislib/domain.py
index a2f7a25..a362339 100644
--- a/src/cmislib/domain.py
+++ b/src/cmislib/domain.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 #
 #      Licensed to the Apache Software Foundation (ASF) under one
 #      or more contributor license agreements.  See the NOTICE file
diff --git a/src/cmislib/exceptions.py b/src/cmislib/exceptions.py
index 207280d..58d601d 100644
--- a/src/cmislib/exceptions.py
+++ b/src/cmislib/exceptions.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 #
 #      Licensed to the Apache Software Foundation (ASF) under one
 #      or more contributor license agreements.  See the NOTICE file
diff --git a/src/cmislib/messages.py b/src/cmislib/messages.py
index 2df8e45..0d49756 100644
--- a/src/cmislib/messages.py
+++ b/src/cmislib/messages.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 #
 #      Licensed to the Apache Software Foundation (ASF) under one
 #      or more contributor license agreements.  See the NOTICE file
diff --git a/src/cmislib/model.py b/src/cmislib/model.py
index 0a04c09..03ede82 100644
--- a/src/cmislib/model.py
+++ b/src/cmislib/model.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 #
 #      Licensed to the Apache Software Foundation (ASF) under one
 #      or more contributor license agreements.  See the NOTICE file
diff --git a/src/cmislib/net.py b/src/cmislib/net.py
index 3984918..e5124a9 100644
--- a/src/cmislib/net.py
+++ b/src/cmislib/net.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 #
 #      Licensed to the Apache Software Foundation (ASF) under one
 #      or more contributor license agreements.  See the NOTICE file
diff --git a/src/cmislib/util.py b/src/cmislib/util.py
index 7b6855f..4455271 100644
--- a/src/cmislib/util.py
+++ b/src/cmislib/util.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 #
 #      Licensed to the Apache Software Foundation (ASF) under one
 #      or more contributor license agreements.  See the NOTICE file
@@ -24,10 +25,46 @@
 import logging
 import datetime
 from cmislib.domain import CmisId
+from urllib import quote
 
 moduleLogger = logging.getLogger('cmislib.util')
 
 
+def to_utf8(value):
+
+    """ Safe encodng of value to utf-8 taking care of unicode values
+    """
+    if isinstance(value, unicode):
+        value = value.encode('utf8')
+    return value
+
+
+def safe_urlencode(in_dict):
+
+    """
+    Safe encoding of values taking care of unicode values
+    urllib.urlencode doesn't like unicode values
+    """
+
+    def encoded_dict(in_dict):
+        out_dict = {}
+        for k, v in in_dict.iteritems():
+            out_dict[k] = to_utf8(v)
+        return out_dict
+
+    return urlencode(encoded_dict(in_dict))
+
+
+def safe_quote(value):
+
+    """
+    Safe encoding of value taking care of unicode value
+    urllib.quote doesn't like unicode values
+    """
+
+    return quote(to_utf8(value))
+
+
 def multiple_replace(aDict, text):
 
     """
diff --git a/src/tests/cmislibtest.py b/src/tests/cmislibtest.py
index a81be56..70f9415 100644
--- a/src/tests/cmislibtest.py
+++ b/src/tests/cmislibtest.py
@@ -346,33 +346,34 @@
     def testGetObjectByPath(self):
         """Create test objects (one folder, one document) then try to get
         them by path"""
-        # names of folders and test docs
-        parentFolderName = 'testGetObjectByPath folder'
-        subFolderName = 'subfolder'
-        docName = 'testdoc'
+        # names of folders and test docs (without and with unicode char)
+        for suffix in ['', u'_éà€$']:
+            parentFolderName = 'testGetObjectByPath folder' + suffix
+            subFolderName = 'subfolder' + suffix
+            docName = 'testdoc' + suffix
 
-        # create the folder structure
-        parentFolder = self._testFolder.createFolder(parentFolderName)
-        subFolder = parentFolder.createFolder(subFolderName)
-        # use the subfolder path to get the folder by path
-        subFolderPath = subFolder.getProperties().get("cmis:path")
-        searchFolder = self._repo.getObjectByPath(subFolderPath)
-        self.assertEquals(subFolder.getObjectId(), searchFolder.getObjectId())
+            # create the folder structure
+            parentFolder = self._testFolder.createFolder(parentFolderName)
+            subFolder = parentFolder.createFolder(subFolderName)
+            # use the subfolder path to get the folder by path
+            subFolderPath = subFolder.getProperties().get("cmis:path")
+            searchFolder = self._repo.getObjectByPath(subFolderPath)
+            self.assertEquals(subFolder.getObjectId(), searchFolder.getObjectId())
 
-        # create a test doc
-        doc = subFolder.createDocument(docName)
-        # ask the doc for its paths
-        searchDocPaths = doc.getPaths()
-        # for each path in the list, try to get the object by path
-        # this is better than building a path with the doc's name b/c the name
-        # isn't guaranteed to be used as the path segment (see CMIS-232)
-        for path in searchDocPaths:
-            searchDoc = self._repo.getObjectByPath(path)
-            self.assertEquals(doc.getObjectId(), searchDoc.getObjectId())
+            # create a test doc
+            doc = subFolder.createDocument(docName)
+            # ask the doc for its paths
+            searchDocPaths = doc.getPaths()
+            # for each path in the list, try to get the object by path
+            # this is better than building a path with the doc's name b/c the name
+            # isn't guaranteed to be used as the path segment (see CMIS-232)
+            for path in searchDocPaths:
+                searchDoc = self._repo.getObjectByPath(path)
+                self.assertEquals(doc.getObjectId(), searchDoc.getObjectId())
 
-        # get the subfolder by path, then ask for its children
-        subFolder = self._repo.getObjectByPath(subFolderPath)
-        self.assertEquals(len(subFolder.getChildren().getResults()), 1)
+            # get the subfolder by path, then ask for its children
+            subFolder = self._repo.getObjectByPath(subFolderPath)
+            self.assertEquals(len(subFolder.getChildren().getResults()), 1)
 
     # getting unfiled documents may work for the atom pub binding for some servers
     # but it isn't part of the spec so removing this test for now