AK: Add UnixDateTime::parse() method

Copy parse() method from LibCore::DateTime::parse(). Augment the method
to handle parsing from GMT time. Fix incorrect handling of year in '%D'
format specifier. Remove all format specifiers related to time zones.
Copy relevant tests and add additional ones.
This commit is contained in:
Tomasz Strejczek 2025-06-21 17:18:45 +02:00 committed by Jelle Raaijmakers
parent 820fee434e
commit ea32e39d68
3 changed files with 395 additions and 0 deletions

View File

@ -7,6 +7,7 @@
#include <AK/Checked.h> #include <AK/Checked.h>
#include <AK/DateConstants.h> #include <AK/DateConstants.h>
#include <AK/GenericLexer.h>
#include <AK/String.h> #include <AK/String.h>
#include <AK/StringBuilder.h> #include <AK/StringBuilder.h>
#include <AK/Time.h> #include <AK/Time.h>
@ -17,6 +18,7 @@
# define localtime_r(time, tm) localtime_s(tm, time) # define localtime_r(time, tm) localtime_s(tm, time)
# define gmtime_r(time, tm) gmtime_s(tm, time) # define gmtime_r(time, tm) gmtime_s(tm, time)
# define tzname _tzname # define tzname _tzname
# define timegm _mkgmtime
#endif #endif
namespace AK { namespace AK {
@ -509,4 +511,261 @@ ByteString UnixDateTime::to_byte_string(StringView format, LocalTime local_time)
return builder.to_byte_string(); return builder.to_byte_string();
} }
Optional<UnixDateTime> UnixDateTime::parse(StringView format, StringView string, bool from_gmt)
{
unsigned format_pos = 0;
struct tm tm = {};
tm.tm_isdst = -1;
auto parsing_failed = false;
GenericLexer string_lexer(string);
auto parse_number = [&] {
auto result = string_lexer.consume_decimal_integer<int>();
if (result.is_error()) {
parsing_failed = true;
return 0;
}
return result.value();
};
auto consume = [&](char c) {
if (!string_lexer.consume_specific(c))
parsing_failed = true;
};
auto consume_specific_ascii_case_insensitive = [&](StringView name) {
auto next_string = string_lexer.peek_string(name.length());
if (next_string.has_value() && next_string->equals_ignoring_ascii_case(name)) {
string_lexer.consume(name.length());
return true;
}
return false;
};
while (format_pos < format.length() && !string_lexer.is_eof()) {
if (format[format_pos] != '%') {
consume(format[format_pos]);
format_pos++;
continue;
}
format_pos++;
if (format_pos == format.length())
return {};
switch (format[format_pos]) {
case 'a': {
auto wday = 0;
for (auto name : short_day_names) {
if (consume_specific_ascii_case_insensitive(name)) {
tm.tm_wday = wday;
break;
}
++wday;
}
if (wday == 7)
return {};
break;
}
case 'A': {
auto wday = 0;
for (auto name : long_day_names) {
if (consume_specific_ascii_case_insensitive(name)) {
tm.tm_wday = wday;
break;
}
++wday;
}
if (wday == 7)
return {};
break;
}
case 'h':
case 'b': {
auto mon = 0;
for (auto name : short_month_names) {
if (consume_specific_ascii_case_insensitive(name)) {
tm.tm_mon = mon;
break;
}
++mon;
}
if (mon == 12)
return {};
break;
}
case 'B': {
auto mon = 0;
for (auto name : long_month_names) {
if (consume_specific_ascii_case_insensitive(name)) {
tm.tm_mon = mon;
break;
}
++mon;
}
if (mon == 12)
return {};
break;
}
case 'C': {
int num = parse_number();
tm.tm_year = (num - 19) * 100 + (tm.tm_year % 100);
break;
}
case 'd':
tm.tm_mday = parse_number();
break;
case 'D': {
int mon = parse_number();
consume('/');
int day = parse_number();
consume('/');
int year = parse_number();
tm.tm_mon = mon - 1;
tm.tm_mday = day;
tm.tm_year = year > 1900 ? year - 1900 : (year <= 99 && year > 69 ? year : 100 + year);
break;
}
case 'e':
tm.tm_mday = parse_number();
break;
case 'H':
tm.tm_hour = parse_number();
break;
case 'I': {
int num = parse_number();
tm.tm_hour = num % 12;
break;
}
case 'j':
// a little trickery here... we can get mktime() to figure out mon and mday using out of range values.
// yday is not used so setting it is pointless.
tm.tm_mday = parse_number();
tm.tm_mon = 0;
(void)mktime(&tm);
break;
case 'm': {
int num = parse_number();
tm.tm_mon = num - 1;
break;
}
case 'M':
tm.tm_min = parse_number();
break;
case 'n':
case 't':
string_lexer.consume_while(is_ascii_space);
break;
case 'r':
case 'p': {
auto ampm = string_lexer.consume(2);
if (ampm == "PM") {
if (tm.tm_hour < 12)
tm.tm_hour += 12;
} else if (ampm != "AM") {
return {};
}
break;
}
case 'R':
tm.tm_hour = parse_number();
consume(':');
tm.tm_min = parse_number();
break;
case 'S':
tm.tm_sec = parse_number();
break;
case 'T':
tm.tm_hour = parse_number();
consume(':');
tm.tm_min = parse_number();
consume(':');
tm.tm_sec = parse_number();
break;
case 'w':
tm.tm_wday = parse_number();
break;
case 'y': {
int year = parse_number();
tm.tm_year = year <= 99 && year > 69 ? 1900 + year : 2000 + year;
break;
}
case 'Y': {
int year = parse_number();
tm.tm_year = year - 1900;
break;
}
case 'x': {
auto hours = parse_number();
int minutes;
if (string_lexer.consume_specific(':')) {
minutes = parse_number();
} else {
minutes = hours % 100;
hours = hours / 100;
}
tm.tm_hour -= hours;
tm.tm_min -= minutes;
break;
}
case 'X': {
if (!string_lexer.consume_specific('.'))
return {};
auto discarded = parse_number();
(void)discarded; // NOTE: the tm structure does not support sub second precision, so drop this value.
break;
}
case '+': {
Optional<char> next_format_character;
if (format_pos + 1 < format.length()) {
next_format_character = format[format_pos + 1];
// Disallow another formatter directly after %+. This is to avoid ambiguity when parsing a string like
// "ignoreJan" with "%+%b", as it would be non-trivial to know that where the %b field begins.
if (next_format_character == '%')
return {};
}
auto discarded = string_lexer.consume_until([&](auto ch) { return ch == next_format_character; });
if (discarded.is_empty())
return {};
break;
}
case '%':
consume('%');
break;
default:
parsing_failed = true;
break;
}
if (parsing_failed)
return {};
format_pos++;
}
if (!string_lexer.is_eof() || format_pos != format.length())
return {};
if (from_gmt) {
// When from_gmt is true, the parsed time is in GMT and needs to be converted to Unix time
tm.tm_isdst = 0; // GMT doesn't have daylight saving time
auto gmt_time = timegm(&tm);
if (gmt_time == -1)
return {};
return UnixDateTime::from_seconds_since_epoch(gmt_time);
}
return UnixDateTime::from_unix_time_parts(
tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
tm.tm_hour, tm.tm_min, tm.tm_sec, 0);
}
} }

View File

@ -478,6 +478,9 @@ public:
ErrorOr<String> to_string(StringView format = "%Y-%m-%d %H:%M:%S"sv, LocalTime = LocalTime::Yes) const; ErrorOr<String> to_string(StringView format = "%Y-%m-%d %H:%M:%S"sv, LocalTime = LocalTime::Yes) const;
Utf16String to_utf16_string(StringView format = "%Y-%m-%d %H:%M:%S"sv, LocalTime = LocalTime::Yes) const; Utf16String to_utf16_string(StringView format = "%Y-%m-%d %H:%M:%S"sv, LocalTime = LocalTime::Yes) const;
ByteString to_byte_string(StringView format = "%Y-%m-%d %H:%M:%S"sv, LocalTime = LocalTime::Yes) const; ByteString to_byte_string(StringView format = "%Y-%m-%d %H:%M:%S"sv, LocalTime = LocalTime::Yes) const;
// Parses a string in the given format. Does not support time zone-related format specifiers.
// If 'from_gmt' is true, the string is parsed as a GMT time, otherwise it is parsed as a local time.
static Optional<UnixDateTime> parse(StringView format, StringView string, bool from_gmt = false);
// Offsetting a UNIX time by a duration yields another UNIX time. // Offsetting a UNIX time by a duration yields another UNIX time.
constexpr UnixDateTime operator+(Duration const& other) const { return UnixDateTime { m_offset + other }; } constexpr UnixDateTime operator+(Duration const& other) const { return UnixDateTime { m_offset + other }; }

View File

@ -8,6 +8,11 @@
#include <AK/Time.h> #include <AK/Time.h>
#ifdef AK_OS_WINDOWS
# include <time.h>
# define gmtime_r(time, tm) gmtime_s(tm, time)
#endif
using AK::Duration; using AK::Duration;
#if (defined(__TIMESIZE) && __TIMESIZE < 64) || defined(AK_OS_WINDOWS) // NOTE: See AK/Time.h, on Windows we hardcode to long's for timeval #if (defined(__TIMESIZE) && __TIMESIZE < 64) || defined(AK_OS_WINDOWS) // NOTE: See AK/Time.h, on Windows we hardcode to long's for timeval
@ -757,3 +762,131 @@ TEST_CASE(time_to_string)
test("%t"sv, "\t"sv, 2023, 1, 1, 0, 0, 0); test("%t"sv, "\t"sv, 2023, 1, 1, 0, 0, 0);
test("%%"sv, "%"sv, 2023, 1, 1, 0, 0, 0); test("%%"sv, "%"sv, 2023, 1, 1, 0, 0, 0);
} }
TEST_CASE(parse_time)
{
auto test = [](auto format, auto time, int year, int month, int day, int hour, int minute, int second = 0) {
auto result = AK::UnixDateTime::parse(format, time);
VERIFY(result.has_value());
auto result_time = result.value().to_timespec();
struct tm tm;
#ifdef AK_OS_WINDOWS
VERIFY(gmtime_r(&result_time.tv_sec, &tm) == 0);
#else
VERIFY(gmtime_r(&result_time.tv_sec, &tm) != nullptr);
#endif
EXPECT_EQ(year, tm.tm_year + 1900);
EXPECT_EQ(month, tm.tm_mon + 1);
EXPECT_EQ(day, tm.tm_mday);
EXPECT_EQ(hour, tm.tm_hour);
EXPECT_EQ(minute, tm.tm_min);
EXPECT_EQ(second, tm.tm_sec);
};
test("%d-%m-%Y %H:%M"sv, "05-01-2023 00:00"sv, 2023, 1, 5, 0, 0);
test("%d-%m-%Y %H:%M GMT"sv, "05-01-2023 00:00 GMT"sv, 2023, 1, 5, 0, 0);
test("%Y/%m/%d %R"sv, "2023/01/23 10:50"sv, 2023, 1, 23, 10, 50);
test("%Y-%m-%d %H:%M"sv, "1999-12-31 23:59"sv, 1999, 12, 31, 23, 59);
test("%Y/%m/%d %R"sv, "1970/01/01 00:00"sv, 1970, 1, 1, 0, 0);
test("%Y/%m/%d %R"sv, "2000/02/29 12:34"sv, 2000, 2, 29, 12, 34); // Leap year
// %T: full time with seconds
test("%Y-%m-%d %T"sv, "2023-01-23 10:50:15"sv, 2023, 1, 23, 10, 50, 15);
// %S: seconds
test("%Y-%m-%d %H:%M:%S"sv, "2023-01-23 10:50:42"sv, 2023, 1, 23, 10, 50, 42);
// %I: 12-hour clock, %p: AM/PM
test("%Y-%m-%d %I:%M %p"sv, "2023-01-23 03:21 PM"sv, 2023, 1, 23, 15, 21, 0);
test("%Y-%m-%d %I:%M %p"sv, "2023-01-23 12:01 AM"sv, 2023, 1, 23, 0, 1, 0);
// %D: shortcut for %m/%d/%y
test("%D %H:%M:%S"sv, "01/23/23 10:50:59"sv, 2023, 1, 23, 10, 50, 59);
// %y: two-digit year, %C: century
test("%y %C %m %d %H:%M:%S"sv, "23 20 01 23 10:50:11"sv, 2023, 1, 23, 10, 50, 11);
// %a/%A: short/long weekday name (parsing ignores names, but test for correct date)
test("%Y-%m-%d %a"sv, "2023-01-23 Mon"sv, 2023, 1, 23, 0, 0, 0);
test("%Y-%m-%d %A"sv, "2023-01-23 Monday"sv, 2023, 1, 23, 0, 0, 0);
// %b/%B: short/long month name (parsing ignores names, but test for correct date)
test("%d %b %Y"sv, "05 Jan 2023"sv, 2023, 1, 5, 0, 0, 0);
test("%d %B %Y"sv, "05 January 2023"sv, 2023, 1, 5, 0, 0, 0);
// %j: day of year
test("%Y %j %H:%M:%S"sv, "2023 023 10:50:12"sv, 2023, 1, 23, 10, 50, 12);
// %w: weekday number (0=Sunday)
test("%Y-%m-%d %w %H:%M:%S"sv, "2023-01-23 1 10:50:13"sv, 2023, 1, 23, 10, 50, 13);
// %n: newline, %t: tab, %%: literal %
test("%Y-%m-%d%n%H:%M:%S"sv, "2023-01-23\n10:50:14"sv, 2023, 1, 23, 10, 50, 14);
test("%Y-%m-%d%t%H:%M:%S"sv, "2023-01-23\t10:50:15"sv, 2023, 1, 23, 10, 50, 15);
test("%Y-%m-%d %% %H:%M:%S"sv, "2023-01-23 % 10:50:16"sv, 2023, 1, 23, 10, 50, 16);
}
TEST_CASE(parse_wildcard_characters)
{
EXPECT(!AK::UnixDateTime::parse("%+"sv, ""sv).has_value());
EXPECT(!AK::UnixDateTime::parse("foo%+"sv, "foo"sv).has_value());
EXPECT(!AK::UnixDateTime::parse("[%*]"sv, "[foo"sv).has_value());
EXPECT(!AK::UnixDateTime::parse("[%*]"sv, "foo]"sv).has_value());
EXPECT(!AK::UnixDateTime::parse("%+%b"sv, "fooJan"sv).has_value());
auto test = [](auto format, auto time, int year, int month, int day) {
auto result = AK::UnixDateTime::parse(format, time);
VERIFY(result.has_value());
auto result_time = result.value().to_timespec();
struct tm tm;
#ifdef AK_OS_WINDOWS
VERIFY(gmtime_r(&result_time.tv_sec, &tm) == 0);
#else
VERIFY(gmtime_r(&result_time.tv_sec, &tm) != nullptr);
#endif
EXPECT_EQ(year, tm.tm_year + 1900);
EXPECT_EQ(month, tm.tm_mon + 1);
EXPECT_EQ(day, tm.tm_mday);
};
test("%Y %+ %m %d"sv, "2023 whf 01 23"sv, 2023, 01, 23);
test("%Y %m %d %+"sv, "2023 01 23 whf"sv, 2023, 01, 23);
test("%Y [%+] %m %d"sv, "2023 [well hello friends!] 01 23"sv, 2023, 01, 23);
test("%Y %m %d [%+]"sv, "2023 01 23 [well hello friends!]"sv, 2023, 01, 23);
}
TEST_CASE(parse_time_from_gmt)
{
// Test parsing with from_gmt = true (GMT time)
auto gmt_result = AK::UnixDateTime::parse("%Y-%m-%d %H:%M:%S"sv,
"2023-01-15 12:00:00"sv, true);
VERIFY(gmt_result.has_value());
// Parse a known GMT time and verify the timestamp
auto test_gmt = AK::UnixDateTime::parse("%Y-%m-%d %H:%M:%S"sv,
"1970-01-01 00:00:00"sv, true);
VERIFY(test_gmt.has_value());
EXPECT_EQ(test_gmt.value().seconds_since_epoch(), 0);
// Test another GMT time
auto test_gmt2 = AK::UnixDateTime::parse("%Y-%m-%d %H:%M:%S"sv,
"1970-01-01 01:00:00"sv, true);
VERIFY(test_gmt2.has_value());
EXPECT_EQ(test_gmt2.value().seconds_since_epoch(), 3600);
// Test with date components
auto test_gmt3 = AK::UnixDateTime::parse("%Y-%m-%d %H:%M:%S"sv,
"2023-06-15 18:30:45"sv, true);
VERIFY(test_gmt3.has_value());
// Verify the parsed time by converting back to tm structure
auto result_time = test_gmt3.value().to_timespec();
struct tm tm;
#ifdef AK_OS_WINDOWS
VERIFY(gmtime_r(&result_time.tv_sec, &tm) == 0);
#else
VERIFY(gmtime_r(&result_time.tv_sec, &tm) != nullptr);
#endif
EXPECT_EQ(tm.tm_year + 1900, 2023);
EXPECT_EQ(tm.tm_mon + 1, 6);
EXPECT_EQ(tm.tm_mday, 15);
EXPECT_EQ(tm.tm_hour, 18);
EXPECT_EQ(tm.tm_min, 30);
EXPECT_EQ(tm.tm_sec, 45);
}