netutils/netlib/netlib_parseurl.c - nuttx-apps - Git at Google

 /****************************************************************************
  * netutils/netlib/netlib_parseurl.c
  *
  *   Copyright (C) 2019 Gregory Nutt. All rights reserved.
  *   Author: Sebastien Lorquet <sebastien@lorquet.fr>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  * 3. Neither the name NuttX nor the names of its contributors may be
  *    used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  ****************************************************************************/

 /****************************************************************************
  * Included Files
  ****************************************************************************/

 #include <nuttx/config.h>

 #include <stdint.h>
 #include <string.h>
 #include <errno.h>

 #include "netutils/netlib.h"

 /****************************************************************************
  * Public Functions
  ****************************************************************************/

 /****************************************************************************
  * Name: netlib_parseurl
  *
  * Description:
  *   Parse an URL, not only HTTP ones. The parsing is according to this rule:
  *   SCHEME :// HOST [: PORT] / PATH
  *   - scheme is everything before the first colon
  *   - scheme must be followed by ://
  *   - host is everything until colon or slash
  *   - port is optional, parsed only if host ends with colon
  *   - path is everything after the host.
  *   This is noticeably simpler that the official URL parsing method, since
  *   - it does not take into account the user:pass@ part that can be present
  *     before the host. Support of these fields is planned in the url_s
  *     structure, but it is not parsed yet/
  *   - it does not separate the URL parameters nor the bookmark
  *   Note: see here for the documentation of a complete URL parsing routine:
  *   https://www.php.net/manual/fr/function.parse-url.php
  *
  ****************************************************************************/

 int netlib_parseurl(FAR const char *str, FAR struct url_s *url)
 {
   FAR const char *src = str;
   FAR char *dest;
   int bytesleft;
   int ret = OK;

   /* extract the protocol field, a set of a-z letters */

   dest      = url->scheme;
   bytesleft = url->schemelen;

   while (*src != '\0' && *src != ':')
     {
       /* Make sure that there is space for another character in the
        * scheme (reserving space for the null terminator).
        */

       if (bytesleft > 1)
         {
           /* Copy the byte */

           *dest++ = *src++;
           bytesleft--;
         }
       else
         {
           /* Note the error, but continue parsing until the end of the
            * hostname
            */

           src++;
           ret = -E2BIG;
         }
     }

   *dest = '\0';

   /* Parse and skip the scheme separator */

   if (*src != ':')
     {
       ret = -EINVAL;
     }

   src++;

   if (*src != '/')
     {
       ret = -EINVAL;
     }

   src++;

   if (*src != '/')
     {
       ret = -EINVAL;
     }

   src++;

   /* Concatenate the hostname following http:// and up to the termnator */

   dest      = url->host;
   bytesleft = url->hostlen;

   while (*src != '\0' && *src != '/' && *src != ' ' && *src != ':')
     {
       /* Make sure that there is space for another character in the
        * hostname (reserving space for the null terminator).
        */

       if (bytesleft > 1)
         {
           /* Copy the byte */

           *dest++ = *src++;
           bytesleft--;
         }
       else
         {
           /* Note the error, but continue parsing until the end of the
            * hostname
            */

           src++;
           ret = -E2BIG;
         }
     }

   *dest = '\0';

   /* Check if the hostname is following by a port number */

   if (*src == ':')
     {
       uint16_t accum = 0;
       src++; /* Skip over the colon */

       while (*src >= '0' && *src <= '9')
         {
           accum = 10*accum + *src - '0';
           src++;
         }

       url->port = accum;
     }

   /* Make sure the file name starts with exactly one '/' */

   dest      = url->path;
   bytesleft = url->pathlen;

   while (*src == '/')
     {
       src++;
     }

   *dest++ = '/';
   bytesleft--;

   /* The copy the rest of the file name to the user buffer */

   strncpy(dest, src, bytesleft);
   url->path[bytesleft - 1] = '\0';
   return ret;
 }
	/****************************************************************************
	* netutils/netlib/netlib_parseurl.c
	*
	* Copyright (C) 2019 Gregory Nutt. All rights reserved.
	* Author: Sebastien Lorquet <sebastien@lorquet.fr>
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	*
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in
	* the documentation and/or other materials provided with the
	* distribution.
	* 3. Neither the name NuttX nor the names of its contributors may be
	* used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
	* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
	* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
	* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
	* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
	* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
	* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	* POSSIBILITY OF SUCH DAMAGE.
	*
	****************************************************************************/

	/****************************************************************************
	* Included Files
	****************************************************************************/

	#include <nuttx/config.h>

	#include <stdint.h>
	#include <string.h>
	#include <errno.h>

	#include "netutils/netlib.h"

	/****************************************************************************
	* Public Functions
	****************************************************************************/

	/****************************************************************************
	* Name: netlib_parseurl
	*
	* Description:
	* Parse an URL, not only HTTP ones. The parsing is according to this rule:
	* SCHEME :// HOST [: PORT] / PATH
	* - scheme is everything before the first colon
	* - scheme must be followed by ://
	* - host is everything until colon or slash
	* - port is optional, parsed only if host ends with colon
	* - path is everything after the host.
	* This is noticeably simpler that the official URL parsing method, since
	* - it does not take into account the user:pass@ part that can be present
	* before the host. Support of these fields is planned in the url_s
	* structure, but it is not parsed yet/
	* - it does not separate the URL parameters nor the bookmark
	* Note: see here for the documentation of a complete URL parsing routine:
	* https://www.php.net/manual/fr/function.parse-url.php
	*
	****************************************************************************/

	int netlib_parseurl(FAR const char str, FAR struct url_s url)
	{
	FAR const char *src = str;
	FAR char *dest;
	int bytesleft;
	int ret = OK;

	/* extract the protocol field, a set of a-z letters */

	dest = url->scheme;
	bytesleft = url->schemelen;

	while (src != '\0' && src != ':')
	{
	/* Make sure that there is space for another character in the
	* scheme (reserving space for the null terminator).
	*/

	if (bytesleft > 1)
	{
	/* Copy the byte */

	dest++ = src++;
	bytesleft--;
	}
	else
	{
	/* Note the error, but continue parsing until the end of the
	* hostname
	*/

	src++;
	ret = -E2BIG;
	}
	}

	*dest = '\0';

	/* Parse and skip the scheme separator */

	if (*src != ':')
	{
	ret = -EINVAL;
	}

	src++;

	if (*src != '/')
	{
	ret = -EINVAL;
	}

	src++;

	if (*src != '/')
	{
	ret = -EINVAL;
	}

	src++;

	/* Concatenate the hostname following http:// and up to the termnator */

	dest = url->host;
	bytesleft = url->hostlen;

	while (src != '\0' && src != '/' && src != ' ' && src != ':')
	{
	/* Make sure that there is space for another character in the
	* hostname (reserving space for the null terminator).
	*/

	if (bytesleft > 1)
	{
	/* Copy the byte */

	dest++ = src++;
	bytesleft--;
	}
	else
	{
	/* Note the error, but continue parsing until the end of the
	* hostname
	*/

	src++;
	ret = -E2BIG;
	}
	}

	*dest = '\0';

	/* Check if the hostname is following by a port number */

	if (*src == ':')
	{
	uint16_t accum = 0;
	src++; /* Skip over the colon */

	while (src >= '0' && src <= '9')
	{
	accum = 10accum + src - '0';
	src++;
	}

	url->port = accum;
	}

	/* Make sure the file name starts with exactly one '/' */

	dest = url->path;
	bytesleft = url->pathlen;

	while (*src == '/')
	{
	src++;
	}

	*dest++ = '/';
	bytesleft--;

	/* The copy the rest of the file name to the user buffer */

	strncpy(dest, src, bytesleft);
	url->path[bytesleft - 1] = '\0';
	return ret;
	}