| /****************************************************************************** |
| |
| Copyright (c) 2009-2010, Terry Caton |
| All rights reserved. |
| |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions are met: |
| * Redistributions of source code must retain the above copyright |
| notice, this list of conditions and the following disclaimer. |
| * Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| * Neither the name of the projecct nor the names of its contributors |
| may be used to endorse or promote products derived from this software |
| without specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
| ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
| ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON |
| ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| ******************************************************************************/ |
| |
| #include <cassert> |
| #include <set> |
| #include <sstream> |
| #include <ctype.h> |
| #include <vector> |
| #include "utf8.h" |
| |
| /* |
| |
| TODO: |
| * better documentation |
| * unicode character decoding |
| |
| */ |
| |
| namespace json |
| { |
| |
| inline std::istream& operator >> (std::istream& istr, UnknownElement& elementRoot) { |
| Reader::Read(elementRoot, istr); |
| return istr; |
| } |
| |
| inline Reader::Location::Location() : |
| m_nLine(0), |
| m_nLineOffset(0), |
| m_nDocOffset(0) |
| {} |
| |
| |
| ////////////////////// |
| // Reader::InputStream |
| |
| class Reader::InputStream // would be cool if we could inherit from std::istream & override "get" |
| { |
| public: |
| InputStream(std::istream& iStr) : |
| m_iStr(iStr) {} |
| |
| // protect access to the input stream, so we can keeep track of document/line offsets |
| char Get(); // big, define outside |
| char Peek() { |
| assert(m_iStr.eof() == false); // enforce reading of only valid stream data |
| return m_iStr.peek(); |
| } |
| |
| bool EOS() { |
| m_iStr.peek(); // apparently eof flag isn't set until a character read is attempted. whatever. |
| return m_iStr.eof(); |
| } |
| |
| const Location& GetLocation() const { return m_Location; } |
| |
| std::istream& GetStream() { |
| return m_iStr; |
| } |
| |
| private: |
| std::istream& m_iStr; |
| Location m_Location; |
| }; |
| |
| |
| inline char Reader::InputStream::Get() |
| { |
| assert(m_iStr.eof() == false); // enforce reading of only valid stream data |
| char c = m_iStr.get(); |
| |
| ++m_Location.m_nDocOffset; |
| if (c == '\n') { |
| ++m_Location.m_nLine; |
| m_Location.m_nLineOffset = 0; |
| } |
| else { |
| ++m_Location.m_nLineOffset; |
| } |
| |
| return c; |
| } |
| |
| |
| |
| ////////////////////// |
| // Reader::TokenStream |
| |
| class Reader::TokenStream |
| { |
| public: |
| TokenStream(const Tokens& tokens); |
| |
| const Token& Peek(); |
| const Token& Get(); |
| |
| bool EOS() const; |
| |
| private: |
| const Tokens& m_Tokens; |
| Tokens::const_iterator m_itCurrent; |
| }; |
| |
| |
| inline Reader::TokenStream::TokenStream(const Tokens& tokens) : |
| m_Tokens(tokens), |
| m_itCurrent(tokens.begin()) |
| {} |
| |
| inline const Reader::Token& Reader::TokenStream::Peek() { |
| if (EOS()) |
| { |
| const Token& lastToken = *m_Tokens.rbegin(); |
| std::string sMessage = "Unexpected end of token stream"; |
| throw ParseException(sMessage, lastToken.locBegin, lastToken.locEnd); // nowhere to point to |
| } |
| return *(m_itCurrent); |
| } |
| |
| inline const Reader::Token& Reader::TokenStream::Get() { |
| const Token& token = Peek(); |
| ++m_itCurrent; |
| return token; |
| } |
| |
| inline bool Reader::TokenStream::EOS() const { |
| return m_itCurrent == m_Tokens.end(); |
| } |
| |
| /////////////////// |
| // Reader (finally) |
| |
| |
| inline void Reader::Read(Object& object, std::istream& istr) { Read_i(object, istr); } |
| inline void Reader::Read(Array& array, std::istream& istr) { Read_i(array, istr); } |
| inline void Reader::Read(String& string, std::istream& istr) { Read_i(string, istr); } |
| inline void Reader::Read(Number& number, std::istream& istr) { Read_i(number, istr); } |
| inline void Reader::Read(Boolean& boolean, std::istream& istr) { Read_i(boolean, istr); } |
| inline void Reader::Read(Null& null, std::istream& istr) { Read_i(null, istr); } |
| inline void Reader::Read(UnknownElement& unknown, std::istream& istr) { Read_i(unknown, istr); } |
| |
| |
| template <typename ElementTypeT> |
| void Reader::Read_i(ElementTypeT& element, std::istream& istr) |
| { |
| Reader reader; |
| |
| Tokens tokens; |
| InputStream inputStream(istr); |
| reader.Scan(tokens, inputStream); |
| |
| TokenStream tokenStream(tokens); |
| reader.Parse(element, tokenStream); |
| |
| if (tokenStream.EOS() == false) |
| { |
| const Token& token = tokenStream.Peek(); |
| std::string sMessage = std::string("Expected End of token stream; found ") + token.sValue; |
| throw ParseException(sMessage, token.locBegin, token.locEnd); |
| } |
| } |
| |
| |
| inline void Reader::Scan(Tokens& tokens, InputStream& inputStream) |
| { |
| while (EatWhiteSpace(inputStream), // ignore any leading white space... |
| inputStream.EOS() == false) // ...before checking for EOS |
| { |
| // if all goes well, we'll create a token each pass |
| Token token; |
| token.locBegin = inputStream.GetLocation(); |
| |
| // gives us null-terminated string |
| char sChar = inputStream.Peek(); |
| switch (sChar) |
| { |
| case '{': |
| token.sValue = MatchExpectedString(inputStream, "{"); |
| token.nType = Token::TOKEN_OBJECT_BEGIN; |
| break; |
| |
| case '}': |
| token.sValue = MatchExpectedString(inputStream, "}"); |
| token.nType = Token::TOKEN_OBJECT_END; |
| break; |
| |
| case '[': |
| token.sValue = MatchExpectedString(inputStream, "["); |
| token.nType = Token::TOKEN_ARRAY_BEGIN; |
| break; |
| |
| case ']': |
| token.sValue = MatchExpectedString(inputStream, "]"); |
| token.nType = Token::TOKEN_ARRAY_END; |
| break; |
| |
| case ',': |
| token.sValue = MatchExpectedString(inputStream, ","); |
| token.nType = Token::TOKEN_NEXT_ELEMENT; |
| break; |
| |
| case ':': |
| token.sValue = MatchExpectedString(inputStream, ":"); |
| token.nType = Token::TOKEN_MEMBER_ASSIGN; |
| break; |
| |
| case '"': |
| token.sValue = MatchString(inputStream); |
| token.nType = Token::TOKEN_STRING; |
| break; |
| |
| case '-': |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| token.sValue = MatchNumber(inputStream); |
| token.nType = Token::TOKEN_NUMBER; |
| break; |
| |
| case 't': |
| token.sValue = MatchExpectedString(inputStream, "true"); |
| token.nType = Token::TOKEN_BOOLEAN; |
| break; |
| |
| case 'f': |
| token.sValue = MatchExpectedString(inputStream, "false"); |
| token.nType = Token::TOKEN_BOOLEAN; |
| break; |
| |
| case 'n': |
| token.sValue = MatchExpectedString(inputStream, "null"); |
| token.nType = Token::TOKEN_NULL; |
| break; |
| |
| default: |
| { |
| std::string sErrorMessage = std::string("Unexpected character in stream: ") + sChar; |
| throw ScanException(sErrorMessage, inputStream.GetLocation()); |
| } |
| } |
| |
| token.locEnd = inputStream.GetLocation(); |
| tokens.push_back(token); |
| } |
| } |
| |
| |
| inline void Reader::EatWhiteSpace(InputStream& inputStream) |
| { |
| while (inputStream.EOS() == false && |
| ::isspace(inputStream.Peek())) |
| inputStream.Get(); |
| } |
| |
| inline std::string Reader::MatchExpectedString(InputStream& inputStream, const std::string& sExpected) |
| { |
| std::string::const_iterator it(sExpected.begin()), |
| itEnd(sExpected.end()); |
| for ( ; it != itEnd; ++it) { |
| if (inputStream.EOS() || // did we reach the end before finding what we're looking for... |
| inputStream.Get() != *it) // ...or did we find something different? |
| { |
| std::string sMessage = std::string("Expected string: ") + sExpected; |
| throw ScanException(sMessage, inputStream.GetLocation()); |
| } |
| } |
| |
| // all's well if we made it here |
| return sExpected; |
| } |
| |
| static void ConsumeUnicode(std::istream& str, std::vector<utf8::uint16_t>& uni16) { |
| char bytes[4] = { '\0' }; |
| str.get(bytes[0]); |
| str.get(bytes[1]); |
| str.get(bytes[2]); |
| str.get(bytes[3]); |
| utf8::uint16_t cp = (bytes[0] - '0') << 12 | (bytes[1] - '0') << 8 | (bytes[2] - '0') << 4 | (bytes[3] - '0'); |
| uni16.push_back(cp); |
| } |
| |
| static inline void FinalizeUnicode(std::string& uni8, std::vector<utf8::uint16_t>& uni16) { |
| utf8::utf16to8(uni16.begin(), uni16.end(), std::back_inserter(uni8)); |
| uni16.clear(); |
| } |
| |
| inline std::string Reader::MatchString(InputStream& inputStream) |
| { |
| MatchExpectedString(inputStream, "\""); |
| |
| std::string string; |
| std::vector<utf8::uint16_t> uni; |
| while (inputStream.EOS() == false && |
| inputStream.Peek() != '"') |
| { |
| char c = inputStream.Get(); |
| |
| // escape? |
| if (c == '\\' && |
| inputStream.EOS() == false) // shouldn't have reached the end yet |
| { |
| c = inputStream.Get(); |
| switch (c) { |
| case '/': string.push_back('/'); FinalizeUnicode(string, uni); break; |
| case '"': string.push_back('"'); FinalizeUnicode(string, uni); break; |
| case '\\': string.append("\\\\"); FinalizeUnicode(string, uni); break; |
| case 'b': string.push_back('\b'); FinalizeUnicode(string, uni); break; |
| case 'f': string.push_back('\f'); FinalizeUnicode(string, uni); break; |
| case 'n': string.push_back('\n'); FinalizeUnicode(string, uni); break; |
| case 'r': string.push_back('\r'); FinalizeUnicode(string, uni); break; |
| case 't': string.push_back('\t'); FinalizeUnicode(string, uni); break; |
| case 'u': ConsumeUnicode(inputStream.GetStream(), uni); break; |
| default: { |
| std::string sMessage = std::string("Unrecognized escape sequence found in string: \\") + c; |
| throw ScanException(sMessage, inputStream.GetLocation()); |
| } |
| } |
| } |
| else { |
| FinalizeUnicode(string, uni); |
| string.push_back(c); |
| } |
| } |
| |
| if (!uni.empty()) FinalizeUnicode(string, uni); |
| |
| // eat the last '"' that we just peeked |
| MatchExpectedString(inputStream, "\""); |
| |
| // all's well if we made it here |
| return string; |
| } |
| |
| |
| inline std::string Reader::MatchNumber(InputStream& inputStream) |
| { |
| const char sNumericChars[] = "0123456789.eE-+"; |
| std::set<char> numericChars; |
| numericChars.insert(sNumericChars, sNumericChars + sizeof(sNumericChars)); |
| |
| std::string sNumber; |
| while (inputStream.EOS() == false && |
| numericChars.find(inputStream.Peek()) != numericChars.end()) |
| { |
| sNumber.push_back(inputStream.Get()); |
| } |
| |
| return sNumber; |
| } |
| |
| |
| inline void Reader::Parse(UnknownElement& element, Reader::TokenStream& tokenStream) |
| { |
| const Token& token = tokenStream.Peek(); |
| switch (token.nType) { |
| case Token::TOKEN_OBJECT_BEGIN: |
| { |
| // implicit non-const cast will perform conversion for us (if necessary) |
| Object& object = element; |
| Parse(object, tokenStream); |
| break; |
| } |
| |
| case Token::TOKEN_ARRAY_BEGIN: |
| { |
| Array& array = element; |
| Parse(array, tokenStream); |
| break; |
| } |
| |
| case Token::TOKEN_STRING: |
| { |
| String& string = element; |
| Parse(string, tokenStream); |
| break; |
| } |
| |
| case Token::TOKEN_NUMBER: |
| { |
| Number& number = element; |
| Parse(number, tokenStream); |
| break; |
| } |
| |
| case Token::TOKEN_BOOLEAN: |
| { |
| Boolean& boolean = element; |
| Parse(boolean, tokenStream); |
| break; |
| } |
| |
| case Token::TOKEN_NULL: |
| { |
| Null& null = element; |
| Parse(null, tokenStream); |
| break; |
| } |
| |
| default: |
| { |
| std::string sMessage = std::string("Unexpected token: ") + token.sValue; |
| throw ParseException(sMessage, token.locBegin, token.locEnd); |
| } |
| } |
| } |
| |
| |
| inline void Reader::Parse(Object& object, Reader::TokenStream& tokenStream) |
| { |
| MatchExpectedToken(Token::TOKEN_OBJECT_BEGIN, tokenStream); |
| |
| bool bContinue = (tokenStream.EOS() == false && |
| tokenStream.Peek().nType != Token::TOKEN_OBJECT_END); |
| while (bContinue) |
| { |
| Object::Member member; |
| |
| // first the member name. save the token in case we have to throw an exception |
| const Token& tokenName = tokenStream.Peek(); |
| member.name = MatchExpectedToken(Token::TOKEN_STRING, tokenStream); |
| |
| // ...then the key/value separator... |
| MatchExpectedToken(Token::TOKEN_MEMBER_ASSIGN, tokenStream); |
| |
| // ...then the value itself (can be anything). |
| Parse(member.element, tokenStream); |
| |
| // try adding it to the object (this could throw) |
| try |
| { |
| object.Insert(member); |
| } |
| catch (Exception&) |
| { |
| // must be a duplicate name |
| std::string sMessage = std::string("Duplicate object member token: ") + member.name; |
| throw ParseException(sMessage, tokenName.locBegin, tokenName.locEnd); |
| } |
| |
| bContinue = (tokenStream.EOS() == false && |
| tokenStream.Peek().nType == Token::TOKEN_NEXT_ELEMENT); |
| if (bContinue) |
| MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream); |
| } |
| |
| MatchExpectedToken(Token::TOKEN_OBJECT_END, tokenStream); |
| } |
| |
| |
| inline void Reader::Parse(Array& array, Reader::TokenStream& tokenStream) |
| { |
| MatchExpectedToken(Token::TOKEN_ARRAY_BEGIN, tokenStream); |
| |
| bool bContinue = (tokenStream.EOS() == false && |
| tokenStream.Peek().nType != Token::TOKEN_ARRAY_END); |
| while (bContinue) |
| { |
| // ...what's next? could be anything |
| Array::iterator itElement = array.Insert(UnknownElement()); |
| UnknownElement& element = *itElement; |
| Parse(element, tokenStream); |
| |
| bContinue = (tokenStream.EOS() == false && |
| tokenStream.Peek().nType == Token::TOKEN_NEXT_ELEMENT); |
| if (bContinue) |
| MatchExpectedToken(Token::TOKEN_NEXT_ELEMENT, tokenStream); |
| } |
| |
| MatchExpectedToken(Token::TOKEN_ARRAY_END, tokenStream); |
| } |
| |
| |
| inline void Reader::Parse(String& string, Reader::TokenStream& tokenStream) |
| { |
| string = MatchExpectedToken(Token::TOKEN_STRING, tokenStream); |
| } |
| |
| |
| inline void Reader::Parse(Number& number, Reader::TokenStream& tokenStream) |
| { |
| const Token& currentToken = tokenStream.Peek(); // might need this later for throwing exception |
| const std::string& sValue = MatchExpectedToken(Token::TOKEN_NUMBER, tokenStream); |
| |
| std::istringstream iStr(sValue); |
| double dValue; |
| iStr >> dValue; |
| |
| // did we consume all characters in the token? |
| if (iStr.eof() == false) |
| { |
| char c = iStr.peek(); |
| std::string sMessage = std::string("Unexpected character in NUMBER token: ") + c; |
| throw ParseException(sMessage, currentToken.locBegin, currentToken.locEnd); |
| } |
| |
| number = dValue; |
| } |
| |
| |
| inline void Reader::Parse(Boolean& boolean, Reader::TokenStream& tokenStream) |
| { |
| const std::string& sValue = MatchExpectedToken(Token::TOKEN_BOOLEAN, tokenStream); |
| boolean = (sValue == "true" ? true : false); |
| } |
| |
| |
| inline void Reader::Parse(Null&, Reader::TokenStream& tokenStream) |
| { |
| MatchExpectedToken(Token::TOKEN_NULL, tokenStream); |
| } |
| |
| |
| inline const std::string& Reader::MatchExpectedToken(Token::Type nExpected, Reader::TokenStream& tokenStream) |
| { |
| const Token& token = tokenStream.Get(); |
| if (token.nType != nExpected) |
| { |
| std::string sMessage = std::string("Unexpected token: ") + token.sValue; |
| throw ParseException(sMessage, token.locBegin, token.locEnd); |
| } |
| |
| return token.sValue; |
| } |
| |
| } // End namespace |