diff --git a/integration_tests/test_str_attributes.py b/integration_tests/test_str_attributes.py index b9e886880b..b211014a69 100755 --- a/integration_tests/test_str_attributes.py +++ b/integration_tests/test_str_attributes.py @@ -354,6 +354,42 @@ def is_title(): assert " ".istitle() == False def is_space(): + s0: str = "" + assert s0.isspace() == False + assert "".isspace() == False + + s1: str = " \t\n\v\f\r" + assert s1.isspace() == True + assert " \t\n\v\f\r".isspace() == True + + s2: str = " \t\n\v\f\rabcd" + assert s2.isspace() == False + assert " \t\n\v\f\rabcd".isspace() == False + + s3: str = "abcd \t\n\v\f\ref" + assert s3.isspace() == False + assert "abcd \t\n\v\f\ref".isspace() == False + + s4: str = " \\t\n\v\f\r" + assert s4.isspace() == False + assert " \\t\n\v\f\r".isspace() == False + + s5: str = " \\t\\n\\v\\f\\r" + assert s5.isspace() == False + assert " \\t\\n\\v\\f\\r".isspace() == False + + s6: str = "Hello, LPython!\n" + assert s6.isspace() == False + assert "Hello, LPython!\n".isspace() == False + + s7: str = "\t\tHello! \n" + assert s7.isspace() == False + assert "\t\tHello! \n".isspace() == False + + s8: str = " \t \n \v \f \r " + assert s8.isspace() == True + assert " \t \n \v \f \r ".isspace() == True + assert "\n".isspace() == True assert " ".isspace() == True assert "\r".isspace() == True diff --git a/src/libasr/string_utils.cpp b/src/libasr/string_utils.cpp index b1b1e92aef..bd496d0899 100644 --- a/src/libasr/string_utils.cpp +++ b/src/libasr/string_utils.cpp @@ -192,6 +192,9 @@ char* str_unescape_c(Allocator &al, LCompilers::Str &s) { } else if (s[idx] == '\\' && s[idx+1] == 'v') { x += "\v"; idx++; + } else if (s[idx] == '\\' && s[idx + 1] == 'f') { + x += "\f"; + idx++; } else if (s[idx] == '\\' && s[idx+1] == '\\') { x += "\\"; idx++; diff --git a/src/runtime/lpython_builtin.py b/src/runtime/lpython_builtin.py index c7befb42a4..404834ba80 100644 --- a/src/runtime/lpython_builtin.py +++ b/src/runtime/lpython_builtin.py @@ -805,8 +805,6 @@ def _lpython_str_istitle(s: str) -> bool: return True if not only_whitespace else False - - @overload def _lpython_str_find(s: str, sub: str) -> i32: s_len :i32; sub_len :i32; flag: bool; _len: i32; @@ -1051,16 +1049,42 @@ def _lpython_str_isascii(s: str) -> bool: return False return True -def _lpython_str_isspace(s:str) -> bool: +def _lpython_str_isspace(s: str) -> bool: + # A Unicode character is considered a 'whitespace' if it has has a bidirectional + # type 'WS', 'B' or 'S'; or the category 'Zs'. if len(s) == 0: return False - ch: str + + ch: str for ch in s: - if ch != ' ' and ch != '\t' and ch != '\n' and ch != '\r' and ch != '\f' and ch != '\v': + if not (ch == " " or # SPACE + ch == "\n" or # LINE FEED (LF) + ch == "\r" or # CARRIAGE RETURN (CR) + ch == "\t" or # CHARACTER TABULATION (HT) + ch == "\v" or # VERTICAL TAB (VT) + ch == "\f" or # FORM FEED (FF) + ch == "\u00A0" or # NO-BREAK SPACE + ch == "\u1680" or # OGHAM SPACE MARK + ch == "\u2000" or # EN QUAD + ch == "\u2001" or # EM QUAD + ch == "\u2002" or # EN SPACE + ch == "\u2003" or # EM SPACE + ch == "\u2004" or # THREE-PER-EM SPACE + ch == "\u2005" or # FOUR-PER-EM SPACE + ch == "\u2006" or # SIX-PER-EM SPACE + ch == "\u2007" or # FIGURE SPACE + ch == "\u2008" or # PUNCTUATION SPACE + ch == "\u2009" or # THIN SPACE + ch == "\u200A" or # HAIR SPACE + ch == "\u2028" or # LINE SEPARATOR + ch == "\u2029" or # PARAGRAPH SEPARATOR + ch == "\u202F" or # NARROW NO-BREAK SPACE + ch == "\u205F" or # MEDIUM MATHEMATICAL SPACE + ch == "\u3000" # IDEOGRAPHIC SPACE + ): return False return True - def list(s: str) -> list[str]: l: list[str] = [] i: i32 @@ -1069,3 +1093,4 @@ def list(s: str) -> list[str]: for i in range(len(s)): l.append(s[i]) return l +