netutils/netlib/netlib_parseurl.c - nuttx-apps - Git at Google

 /****************************************************************************
  * apps/netutils/netlib/netlib_parseurl.c
  *
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.  The
  * ASF licenses this file to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance with the
  * License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
  * License for the specific language governing permissions and limitations
  * under the License.
  *
  ****************************************************************************/

 /****************************************************************************
  * Included Files
  ****************************************************************************/

 #include <nuttx/config.h>

 #include <stdint.h>
 #include <string.h>
 #include <errno.h>

 #include "netutils/netlib.h"

 /****************************************************************************
  * Public Functions
  ****************************************************************************/

 /****************************************************************************
  * Name: netlib_parseurl
  *
  * Description:
  *   Parse an URL, not only HTTP ones. The parsing is according to this rule:
  *   SCHEME :// HOST [: PORT] / PATH
  *   - scheme is everything before the first colon
  *   - scheme must be followed by ://
  *   - host is everything until colon or slash
  *   - port is optional, parsed only if host ends with colon
  *   - path is everything after the host.
  *   This is noticeably simpler that the official URL parsing method, since
  *   - it does not take into account the user:pass@ part that can be present
  *     before the host. Support of these fields is planned in the url_s
  *     structure, but it is not parsed yet/
  *   - it does not separate the URL parameters nor the bookmark
  *   Note: see here for the documentation of a complete URL parsing routine:
  *   https://www.php.net/manual/fr/function.parse-url.php
  *
  ****************************************************************************/

 int netlib_parseurl(FAR const char *str, FAR struct url_s *url)
 {
   FAR const char *src = str;
   FAR char *dest;
   size_t bytesleft;
   int ret = OK;
   size_t pathlen;

   /* Each fields should have at least 1 byte to store
    * the terminating NUL.
    */

   if (url->schemelen == 0 || url->hostlen == 0 || url->pathlen == 0)
     {
       return -EINVAL;
     }

   /* extract the protocol field, a set of a-z letters */

   dest      = url->scheme;
   bytesleft = url->schemelen;

   while (*src != '\0' && *src != ':')
     {
       /* Make sure that there is space for another character in the
        * scheme (reserving space for the null terminator).
        */

       if (bytesleft > 1)
         {
           /* Copy the byte */

           *dest++ = *src++;
           bytesleft--;
         }
       else
         {
           /* Note the error, but continue parsing until the end of the
            * hostname
            */

           src++;
           ret = -E2BIG;
         }
     }

   *dest = '\0';

   /* Parse and skip the scheme separator */

   if (*src != ':')
     {
       return -EINVAL;
     }

   src++;

   if (*src != '/')
     {
       return -EINVAL;
     }

   src++;

   if (*src != '/')
     {
       return -EINVAL;
     }

   src++;

   /* Concatenate the hostname following http:// and up to the termnator */

   dest      = url->host;
   bytesleft = url->hostlen;

   while (*src != '\0' && *src != '/' && *src != ' ' && *src != ':')
     {
       /* Make sure that there is space for another character in the
        * hostname (reserving space for the null terminator).
        */

       if (bytesleft > 1)
         {
           /* Copy the byte */

           *dest++ = *src++;
           bytesleft--;
         }
       else
         {
           /* Note the error, but continue parsing until the end of the
            * hostname
            */

           src++;
           ret = -E2BIG;
         }
     }

   *dest = '\0';

   /* Check if the hostname is following by a port number */

   if (*src == ':')
     {
       uint16_t accum = 0;
       src++; /* Skip over the colon */

       while (*src >= '0' && *src <= '9')
         {
           accum = 10*accum + *src - '0';
           src++;
         }

       url->port = accum;
     }

   /* Make sure the file name starts with exactly one '/' */

   dest      = url->path;
   bytesleft = url->pathlen;

   while (*src == '/')
     {
       src++;
     }

   /* Note: the current implementation does not distinguish
    * an empty path and "/". While it's fine for HTTP, maybe it's
    * cleaner to move the HTTP-specific normalization to the caller.
    */

   *dest++ = '/';
   bytesleft--;

   /* The copy the rest of the file name to the user buffer */

   pathlen = strlen(src);
   if (bytesleft >= pathlen + 1)
     {
       memcpy(dest, src, pathlen);
       dest[pathlen] = '\0';
     }
   else
     {
       dest[0] = '\0';
       ret = -E2BIG;
     }

   return ret;
 }
	/****************************************************************************
	* apps/netutils/netlib/netlib_parseurl.c
	*
	* SPDX-License-Identifier: Apache-2.0
	*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership. The
	* ASF licenses this file to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance with the
	* License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
	* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
	* License for the specific language governing permissions and limitations
	* under the License.
	*
	****************************************************************************/

	/****************************************************************************
	* Included Files
	****************************************************************************/

	#include <nuttx/config.h>

	#include <stdint.h>
	#include <string.h>
	#include <errno.h>

	#include "netutils/netlib.h"

	/****************************************************************************
	* Public Functions
	****************************************************************************/

	/****************************************************************************
	* Name: netlib_parseurl
	*
	* Description:
	* Parse an URL, not only HTTP ones. The parsing is according to this rule:
	* SCHEME :// HOST [: PORT] / PATH
	* - scheme is everything before the first colon
	* - scheme must be followed by ://
	* - host is everything until colon or slash
	* - port is optional, parsed only if host ends with colon
	* - path is everything after the host.
	* This is noticeably simpler that the official URL parsing method, since
	* - it does not take into account the user:pass@ part that can be present
	* before the host. Support of these fields is planned in the url_s
	* structure, but it is not parsed yet/
	* - it does not separate the URL parameters nor the bookmark
	* Note: see here for the documentation of a complete URL parsing routine:
	* https://www.php.net/manual/fr/function.parse-url.php
	*
	****************************************************************************/

	int netlib_parseurl(FAR const char str, FAR struct url_s url)
	{
	FAR const char *src = str;
	FAR char *dest;
	size_t bytesleft;
	int ret = OK;
	size_t pathlen;

	/* Each fields should have at least 1 byte to store
	* the terminating NUL.
	*/

	if (url->schemelen == 0 \|\| url->hostlen == 0 \|\| url->pathlen == 0)
	{
	return -EINVAL;
	}

	/* extract the protocol field, a set of a-z letters */

	dest = url->scheme;
	bytesleft = url->schemelen;

	while (src != '\0' && src != ':')
	{
	/* Make sure that there is space for another character in the
	* scheme (reserving space for the null terminator).
	*/

	if (bytesleft > 1)
	{
	/* Copy the byte */

	dest++ = src++;
	bytesleft--;
	}
	else
	{
	/* Note the error, but continue parsing until the end of the
	* hostname
	*/

	src++;
	ret = -E2BIG;
	}
	}

	*dest = '\0';

	/* Parse and skip the scheme separator */

	if (*src != ':')
	{
	return -EINVAL;
	}

	src++;

	if (*src != '/')
	{
	return -EINVAL;
	}

	src++;

	if (*src != '/')
	{
	return -EINVAL;
	}

	src++;

	/* Concatenate the hostname following http:// and up to the termnator */

	dest = url->host;
	bytesleft = url->hostlen;

	while (src != '\0' && src != '/' && src != ' ' && src != ':')
	{
	/* Make sure that there is space for another character in the
	* hostname (reserving space for the null terminator).
	*/

	if (bytesleft > 1)
	{
	/* Copy the byte */

	dest++ = src++;
	bytesleft--;
	}
	else
	{
	/* Note the error, but continue parsing until the end of the
	* hostname
	*/

	src++;
	ret = -E2BIG;
	}
	}

	*dest = '\0';

	/* Check if the hostname is following by a port number */

	if (*src == ':')
	{
	uint16_t accum = 0;
	src++; /* Skip over the colon */

	while (src >= '0' && src <= '9')
	{
	accum = 10accum + src - '0';
	src++;
	}

	url->port = accum;
	}

	/* Make sure the file name starts with exactly one '/' */

	dest = url->path;
	bytesleft = url->pathlen;

	while (*src == '/')
	{
	src++;
	}

	/* Note: the current implementation does not distinguish
	* an empty path and "/". While it's fine for HTTP, maybe it's
	* cleaner to move the HTTP-specific normalization to the caller.
	*/

	*dest++ = '/';
	bytesleft--;

	/* The copy the rest of the file name to the user buffer */

	pathlen = strlen(src);
	if (bytesleft >= pathlen + 1)
	{
	memcpy(dest, src, pathlen);
	dest[pathlen] = '\0';
	}
	else
	{
	dest[0] = '\0';
	ret = -E2BIG;
	}

	return ret;
	}