| /**************************************************************************** |
| * netutils/netlib/netlib_parseurl.c |
| * |
| * Copyright (C) 2019 Gregory Nutt. All rights reserved. |
| * Author: Sebastien Lorquet <sebastien@lorquet.fr> |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * 3. Neither the name NuttX nor the names of its contributors may be |
| * used to endorse or promote products derived from this software |
| * without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS |
| * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
| * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN |
| * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| * |
| ****************************************************************************/ |
| |
| /**************************************************************************** |
| * Included Files |
| ****************************************************************************/ |
| |
| #include <nuttx/config.h> |
| |
| #include <stdint.h> |
| #include <string.h> |
| #include <errno.h> |
| |
| #include "netutils/netlib.h" |
| |
| /**************************************************************************** |
| * Public Functions |
| ****************************************************************************/ |
| |
| /**************************************************************************** |
| * Name: netlib_parseurl |
| * |
| * Description: |
| * Parse an URL, not only HTTP ones. The parsing is according to this rule: |
| * SCHEME :// HOST [: PORT] / PATH |
| * - scheme is everything before the first colon |
| * - scheme must be followed by :// |
| * - host is everything until colon or slash |
| * - port is optional, parsed only if host ends with colon |
| * - path is everything after the host. |
| * This is noticeably simpler that the official URL parsing method, since |
| * - it does not take into account the user:pass@ part that can be present |
| * before the host. Support of these fields is planned in the url_s |
| * structure, but it is not parsed yet/ |
| * - it does not separate the URL parameters nor the bookmark |
| * Note: see here for the documentation of a complete URL parsing routine: |
| * https://www.php.net/manual/fr/function.parse-url.php |
| * |
| ****************************************************************************/ |
| |
| int netlib_parseurl(FAR const char *str, FAR struct url_s *url) |
| { |
| FAR const char *src = str; |
| FAR char *dest; |
| int bytesleft; |
| int ret = OK; |
| |
| /* extract the protocol field, a set of a-z letters */ |
| |
| dest = url->scheme; |
| bytesleft = url->schemelen; |
| |
| while (*src != '\0' && *src != ':') |
| { |
| /* Make sure that there is space for another character in the |
| * scheme (reserving space for the null terminator). |
| */ |
| |
| if (bytesleft > 1) |
| { |
| /* Copy the byte */ |
| |
| *dest++ = *src++; |
| bytesleft--; |
| } |
| else |
| { |
| /* Note the error, but continue parsing until the end of the |
| * hostname |
| */ |
| |
| src++; |
| ret = -E2BIG; |
| } |
| } |
| |
| *dest = '\0'; |
| |
| /* Parse and skip the scheme separator */ |
| |
| if (*src != ':') |
| { |
| ret = -EINVAL; |
| } |
| |
| src++; |
| |
| if (*src != '/') |
| { |
| ret = -EINVAL; |
| } |
| |
| src++; |
| |
| if (*src != '/') |
| { |
| ret = -EINVAL; |
| } |
| |
| src++; |
| |
| /* Concatenate the hostname following http:// and up to the termnator */ |
| |
| dest = url->host; |
| bytesleft = url->hostlen; |
| |
| while (*src != '\0' && *src != '/' && *src != ' ' && *src != ':') |
| { |
| /* Make sure that there is space for another character in the |
| * hostname (reserving space for the null terminator). |
| */ |
| |
| if (bytesleft > 1) |
| { |
| /* Copy the byte */ |
| |
| *dest++ = *src++; |
| bytesleft--; |
| } |
| else |
| { |
| /* Note the error, but continue parsing until the end of the |
| * hostname |
| */ |
| |
| src++; |
| ret = -E2BIG; |
| } |
| } |
| |
| *dest = '\0'; |
| |
| /* Check if the hostname is following by a port number */ |
| |
| if (*src == ':') |
| { |
| uint16_t accum = 0; |
| src++; /* Skip over the colon */ |
| |
| while (*src >= '0' && *src <= '9') |
| { |
| accum = 10*accum + *src - '0'; |
| src++; |
| } |
| |
| url->port = accum; |
| } |
| |
| /* Make sure the file name starts with exactly one '/' */ |
| |
| dest = url->path; |
| bytesleft = url->pathlen; |
| |
| while (*src == '/') |
| { |
| src++; |
| } |
| |
| *dest++ = '/'; |
| bytesleft--; |
| |
| /* The copy the rest of the file name to the user buffer */ |
| |
| strncpy(dest, src, bytesleft); |
| url->path[bytesleft - 1] = '\0'; |
| return ret; |
| } |