blob: e41b108894da0ea0c4bdb86aaa20420ac2d2ac28 [file] [log] [blame]
#!/usr/bin/env python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# $Id: $
# $Change: $
# $DateTime: $
# $Author: $
"""
Date and time input conversion
Module contents:
---- common
class DatetimeValueError - subclass of ValueError
---- datetime or date
datetime_syntax_msg - a string briefly describing the input format
str_to_datetime() - Interprets a string as a date and (optional) time.
scan_datetime() - Consumes a date and time from a string.
---- timedelta
signed_duration_syntax_msg - a string briefly describing the input format
unsigned_duration_syntax_msg - a string briefly describing the input format
str_to_duration() - Interprets a string as a duration or length of time.
scan_duration() - Consumes a duration from a string.
Examples:
from lib.datetimeutils import str_to_datetime
import datetime
dt = str_to_datetime('2008-07-13 14:15:16.1')
dt = str_to_datetime('20080713 141516.123456')
dt = str_to_datetime(' 20080713T1415 ')
d = str_to_datetime('2008-7-4')
from lib.datetimeutils import str_to_duration
import timedelta
td = str_to_duration('1') # one hour
td = str_to_duration(' 1:2:3 ') # one hour, two minutes and three seconds
td = str_to_duration(':2:3.4') # two minutes and 3.4 seconds
td = str_to_duration('-72') # negative 72 hours (normalized to -3 days)
The datetime syntax is based on ISO 8601 and RFC 3339. See...
http://www.rfc.net/rfc3339.html
http://hydracen.com/dx/iso8601.htm
http://www.cl.cam.ac.uk/~mgk25/iso-time.html
In the docstrings, the examples preceded by '>>>' can be executed and verified
automatically by the 'doctest' package of the Python standard library.
"""
from datetime import date, datetime, timedelta
import re
#--------------------------------- common ---------------------------------
class DatetimeValueError (ValueError):
"""
Error on conversion from string to date, datetime, or timedelta.
DatetimeValueError fields:
description
pos
endpos
badness
"""
def __init__(self, description, string, pos=None, endpos=None):
# Save enough info in the exception object so that a caller's
# exception handler can create a differently formatted message
# in case they don't like the format we provide.
global _spacepat
if endpos is None:
endpos = len(string or '')
if pos and string:
# avoid copying all of string[pos:endpos], in case it is big
p = _spacepat.match(string, pos, endpos).end()
if endpos > p+30:
badness = string[p:p+27].rstrip(' \t') + '...'
else:
badness = string[p:endpos].rstrip(' \t')
else:
badness = string
self.description, self.pos, self.endpos, self.badness = description, pos, endpos, badness
if badness:
description = '"%s" ... %s' % (badness, description)
ValueError.__init__(self, description)
#-------------------------------- datetime --------------------------------
datetime_syntax_msg = ('Specify date and time as "YYYY-MM-DD[ HH:MM[:SS[.S]]" '
'or "YYYYMMDD[ HHMM[SS[.S]]]". If both date and time '
'are given, a space or letter "T" must separate them.')
def str_to_datetime(string, pos=0, endpos=None):
"""
Interprets string[pos:endpos] as a date and (optional) time.
Returns a datetime.datetime object if string[pos:endpos] contains a valid
date and time. Returns a datetime.date object for a date with no time.
Raises DatetimeValueError if string does not contain a valid date, or
contains an invalid time or anything else other than whitespace.
Examples...
# delimited format
>>> str_to_datetime('2008-7-13')
datetime.date(2008, 7, 13)
>>> str_to_datetime('2008-7-13 2:15')
datetime.datetime(2008, 7, 13, 2, 15)
>>> str_to_datetime(' 2008-07-13 14:15:16.123456789 ')
datetime.datetime(2008, 7, 13, 14, 15, 16, 123456)
# numeric format
>>> str_to_datetime(' 20080713 ')
datetime.date(2008, 7, 13)
>>> str_to_datetime('20080713 141516.123')
datetime.datetime(2008, 7, 13, 14, 15, 16, 123000)
# slicing
>>> str_to_datetime('9200807139', 1, 9)
datetime.date(2008, 7, 13)
# 'T' can separate date and time
>>> str_to_datetime('2008-7-3t2:15')
datetime.datetime(2008, 7, 3, 2, 15)
>>> str_to_datetime(' 20080713T1415 ')
datetime.datetime(2008, 7, 13, 14, 15)
Errors... (for more, see scan_datetime() below)
>>> str_to_datetime('')
Traceback (most recent call last):
DatetimeValueError: Specify date and time as "YYYY-MM-DD[ HH:MM[:SS[.S]]" or "YYYYMMDD[ HHMM[SS[.S]]]". If both date and time are given, a space or letter "T" must separate them.
>>> str_to_datetime('nogood')
Traceback (most recent call last):
DatetimeValueError: "nogood" ... Specify date and time as "YYYY-MM-DD[ HH:MM[:SS[.S]]" or "YYYYMMDD[ HHMM[SS[.S]]]". If both date and time are given, a space or letter "T" must separate them.
>>> str_to_datetime(' 20080713 T1415 ') # can't have spaces around 'T'
Traceback (most recent call last):
DatetimeValueError: " 20080713 T1415 " ... date is followed by unrecognized "T1415"
>>> str_to_datetime('2008-7-13 2:15 &')
Traceback (most recent call last):
DatetimeValueError: "2008-7-13 2:15 &" ... date and time are followed by unrecognized "&"
>>> str_to_datetime('2008-7-13 abcdefghijklmnop')
Traceback (most recent call last):
DatetimeValueError: "2008-7-13 abcdefghijklmnop" ... date is followed by unrecognized "abcdefghi"
"""
global datetime_syntax_msg
if endpos is None:
endpos = len(string)
value, nextpos = scan_datetime(string, pos, endpos)
if value is None:
# string is empty or doesn't conform to the date syntax we require
raise DatetimeValueError(datetime_syntax_msg, string, pos, endpos)
elif nextpos < endpos and not string[nextpos:endpos].isspace():
# got valid date or datetime, but there is something more after it
if hasattr(value, 'hour'):
msg = 'date and time are followed by unrecognized "%s"'
else:
msg = 'date is followed by unrecognized "%s"'
msg %= string[nextpos:min(nextpos+10,endpos)].strip()
raise DatetimeValueError(msg, string, pos, endpos)
return value
def scan_datetime(string, pos=0, endpos=None):
"""
Consumes an initial substring of the slice string[pos:endpos] which
represents a date and (optional) time. Leading whitespace is ignored.
Raises DatetimeValueError if the text syntactically resembles a date or
datetime but fails semantic checks (e.g. field out of range, such as the
day in '2008-02-30').
Otherwise returns a tuple (value, nextpos) in which 'value' is either a
datetime.datetime object, a datetime.date object, or None; and 'nextpos'
is the index of the next character of string (pos <= nextpos <= endpos).
(None, pos) is returned if the beginning of the string (or slice) does
not conform to either the delimited date format YYYY-[M]M-[D]D or the
numeric date format YYYYMMDD.
(dt, nextpos) in which dt is a datetime.datetime object, is returned if
a valid date and time are found. At least one space or tab, or else the
single letter 'T' or 't' with no whitespace, must separate date and time.
Time must be delimited using colons [H]H:MM:[SS[.fraction]] if the
date is delimited, or must be numeric HHMM[SS[.fraction]] if the date is
numeric. The fraction may be any number of digits (at most 6 are kept).
(d, nextpos) in which d is a datetime.date object, is returned if a
valid date is found but the subsequent characters do not syntactically
resemble a time in the required format.
Examples... (for other syntax examples, see str_to_datetime() above)
>>> scan_datetime('2008-6-30')
(datetime.date(2008, 6, 30), 9)
>>> scan_datetime(' 2008-07-13 14:15:16 ')
(datetime.datetime(2008, 7, 13, 14, 15, 16), 20)
>>> scan_datetime(' nogood ')
(None, 0)
Errors...
>>> scan_datetime('2008-06-31')
Traceback (most recent call last):
DatetimeValueError: "2008-06-31" ... day is out of range for month
>>> scan_datetime('2008-07-13 24:00')
Traceback (most recent call last):
DatetimeValueError: "2008-07-13 24:00" ... hour must be in 0..23
>>> scan_datetime('2008-07-13 14:15:1 ')
Traceback (most recent call last):
DatetimeValueError: "2008-07-13 14:15:1 " ... second should have 2 digits
>>> scan_datetime('2008-07-13 14:5:16 ')
Traceback (most recent call last):
DatetimeValueError: "2008-07-13 14:5:16 " ... minute should have 2 digits
>>> scan_datetime('20080713 14151 ')
Traceback (most recent call last):
DatetimeValueError: "20080713 14151 " ... time should have 4 or 6 digits (HHMM or HHMMSS)
>>> scan_datetime('20080713 1.234')
Traceback (most recent call last):
DatetimeValueError: "20080713 1.234" ... time should have 6 digits before decimal point (HHMMSS.sss)
"""
global _datepat, _colontimepat, _numerictimepat
if endpos is None:
endpos = len(string)
datematch = _datepat.match(string, pos, endpos)
if datematch is None:
return None, pos
try:
nextpos = datematch.end()
yyyy, m, d, mm, dd = datematch.groups()
if m:
# delimited format
if len(d) <> 2 and len(d) <> 1:
raise ValueError, 'day should have 1 or 2 digits'
year, month, day = int(yyyy), int(m), int(d)
timepat = _colontimepat
else:
# numeric format
if len(dd) > 2:
raise ValueError, 'date should have 8 digits (YYYYMMDD)'
year, month, day = int(yyyy), int(mm), int(dd)
timepat = _numerictimepat
timematch = timepat.match(string, nextpos, endpos)
if timematch is None:
return date(year, month, day), nextpos
nextpos = timematch.end()
if m:
# delimited format
hh, mm, ss, frac = timematch.groups()
if len(hh) > 2:
raise ValueError, 'hour should have 1 or 2 digits'
if len(mm) <> 2:
raise ValueError, 'minute should have 2 digits'
if ss is not None and len(ss) <> 2:
raise ValueError, 'second should have 2 digits'
else:
# numeric format
hhmmss, frac = timematch.groups()
if len(hhmmss) == 6:
hh, mm, ss = hhmmss[:2], hhmmss[2:4], hhmmss[4:]
elif frac:
raise ValueError, 'time should have 6 digits before decimal point (HHMMSS.sss)'
elif len(hhmmss) == 4:
hh, mm, ss = hhmmss[:2], hhmmss[2:], None
else:
raise ValueError, 'time should have 4 or 6 digits (HHMM or HHMMSS)'
if frac:
microsecond = int((frac + '000000')[1:7])
dt = datetime(year, month, day, int(hh), int(mm), int(ss), microsecond)
elif ss:
dt = datetime(year, month, day, int(hh), int(mm), int(ss))
else:
dt = datetime(year, month, day, int(hh), int(mm))
return dt, nextpos
except ValueError, e:
# Nonsensical date or time (e.g. field out of range, such as month > 12)
raise DatetimeValueError(str(e), string, pos, nextpos)
#------------------------------- timedelta -------------------------------
signed_duration_syntax_msg = 'Specify duration as [hours][:minutes[:seconds[.fraction]]]'
unsigned_duration_syntax_msg = 'Specify duration as [+|-][hours][:minutes[:seconds[.fraction]]]'
def str_to_duration(string, pos=0, endpos=None, signed=True):
"""
Interprets string[pos:endpos] as a duration or length of time in the form:
[+|-][hours][:minutes[:seconds[.fraction]]]
If string[pos:endpos] contains a valid duration, it is converted to a
datetime.timedelta object, which is returned.
Raises DatetimeValueError if string[pos:endpos] does not contain a
valid duration; or if the duration includes a '+' or '-' sign and the
caller specifies signed=False; or if the duration is preceded or
followed by anything but whitespace.
Examples...
>>> str_to_duration('48')
datetime.timedelta(2)
>>> str_to_duration(' :120 ')
datetime.timedelta(0, 7200)
>>> str_to_duration(':0:72')
datetime.timedelta(0, 72)
>>> str_to_duration(':1:30')
datetime.timedelta(0, 90)
>>> str_to_duration('1:2:3.123456789')
datetime.timedelta(0, 3723, 123456)
# slicing
>>> str_to_duration('9249', 1, 3)
datetime.timedelta(1)
# duration can be negative if signed=True
>>> str_to_duration('-1')
datetime.timedelta(-1, 82800)
>>> str_to_duration('-:1')
datetime.timedelta(-1, 86340)
Errors... (for more, see scan_duration() below)
>>> str_to_duration('')
Traceback (most recent call last):
DatetimeValueError: Specify duration as [hours][:minutes[:seconds[.fraction]]]
>>> str_to_duration(':')
Traceback (most recent call last):
DatetimeValueError: ":" ... Specify duration as [hours][:minutes[:seconds[.fraction]]]
>>> str_to_duration('1:2: 3')
Traceback (most recent call last):
DatetimeValueError: "1:2: 3" ... duration is followed by unrecognized ": 3"
"""
global duration_syntax_msg
if endpos is None:
endpos = len(string)
value, nextpos = scan_duration(string, pos, endpos, signed)
if value is None:
# string is empty or doesn't conform to the syntax we require
if signed:
raise DatetimeValueError(signed_duration_syntax_msg, string, pos, endpos)
else:
raise DatetimeValueError(unsigned_duration_syntax_msg, string, pos, endpos)
elif nextpos < endpos and not string[nextpos:endpos].isspace():
# got valid duration, but there is something more after it
msg = ('duration is followed by unrecognized "%s"'
% string[nextpos:min(nextpos+10,endpos)].strip())
raise DatetimeValueError(msg, string, pos, endpos)
return value
def scan_duration(string, pos=0, endpos=None, signed=True):
"""
Consumes an initial substring of the slice string[pos:endpos]
representing a duration of time. Leading whitespace is ignored.
If 'signed' is True, a '+' or '-' sign may precede the duration.
Note that a negative duration stored in a datetime.timedelta object
is normalized so that only the 'days' field is negative; for example,
'-:0:0.000001' is represented as datetime.timedelta(days=-1,
seconds=86399, microseconds=999999).
(None, pos) is returned if the input does not conform to the
[+|-][hours][:minutes[:seconds[.fraction]]] format.
If a valid duration is found, a tuple (td, nextpos) is returned,
where 'td' is a datetime.timedelta object, and 'nextpos' is the
index of the next character of the string (pos <= nextpos <= endpos).
Raises DatetimeValueError if the text syntactically resembles a
duration but fails semantic checks (e.g. field out of range, such
as minutes > 59 in '100:60:00'); or if 'signed' is False and a
'+' or '-' sign is found.
Examples... (for other syntax examples, see str_to_duration() above)
>>> scan_duration('1:2:3.4')
(datetime.timedelta(0, 3723, 400000), 7)
>>> scan_duration('')
(None, 0)
>>> scan_duration('bad')
(None, 0)
Errors...
>>> scan_duration('100:200:300')
Traceback (most recent call last):
DatetimeValueError: "100:200:300" ... minutes should be in 0..59
>>> scan_duration('-:1', signed=False)
Traceback (most recent call last):
DatetimeValueError: "-:1" ... duration should be unsigned
"""
global _durationpat
if endpos is None:
endpos = len(string)
match = _durationpat.match(string, pos, endpos)
sign, h, m, s, f = match.groups()
if not h and not m:
return None, pos
try:
hours = minutes = seconds = microseconds = 0
if h:
hours = int(h)
if m:
minutes = int(m)
if s:
seconds = int(s)
if f and len(f) > 1:
microseconds = int((f + '000000')[1:7])
if hours > 0 and minutes > 59:
raise ValueError('minutes should be in 0..59')
minutes += hours * 60
if minutes > 0 and seconds > 59:
raise ValueError('seconds should be in 0..59')
seconds += minutes * 60
td = timedelta(seconds=seconds, microseconds=microseconds)
if sign:
if not signed:
raise ValueError('duration should be unsigned')
if sign == '-':
td = -td
return td, match.end()
except OverflowError, e:
# Nonsensical duration (e.g. field out of range)
raise DatetimeValueError(str(e), string, pos, match.end())
except ValueError, e:
# Nonsensical duration (e.g. field out of range)
raise DatetimeValueError(str(e), string, pos, match.end())
#-------------------------------- private --------------------------------
_datepat = (r'\s*' # skip any leading whitespace
r'(\d\d\d\d)' # yyyy \1
r'(?:' # followed by either
r'(?:-(\d\d?)-(\d*))|' # -m[m]-d[d] or \2 \3
r'(?:(\d\d)(\d\d+))' # mmdd \4 \5
r')') # note dd absorbs excess digits
_colontimepat = (r'(?:\s+|[Tt])' # whitespace or 'T'
r'(\d+)' # [h]h \1
r':(\d+)' # :mm \2
r'(?::(\d*)(\.\d*)?)?') # [:ss[.frac]] \3 \4
# hh, mm, ss absorb excess digits
_numerictimepat = (r'(?:\s+|[Tt])' # whitespace or 'T'
r'(\d+)' # hhmmss \1
r'(\.\d*)?') # [.frac] \2
# hhmmss absorbs excess digits
_durationpat = (r'\s*' # skip any leading whitespace
r'([+-])?' # [+|-] \1
r'(\d*)' # [hours] \2
r'(?:' # [
r':(\d+)' # :minutes \3
r'(?::(\d+)(\.\d*)?)?' # [:seconds[.frac]] \4 \5
r')?') # ]
_spacepat = r'\s*' # whitespace
_datepat = re.compile(_datepat)
_colontimepat = re.compile(_colontimepat)
_numerictimepat = re.compile(_numerictimepat)
_durationpat = re.compile(_durationpat)
_spacepat = re.compile(_spacepat)
# If invoked as a script, execute the examples in each function's docstring
# and verify the expected output. Produces no output if verification is
# successful, unless -v is specified. Use -v option for verbose test output.
def _test():
import doctest
doctest.testmod()
if __name__ == "__main__":
_test()