diff --git a/cheuph/__init__.py b/cheuph/__init__.py
index c63ff02..fbc12e0 100644
--- a/cheuph/__init__.py
+++ b/cheuph/__init__.py
@@ -10,6 +10,7 @@ from .element_supply import *
 from .exceptions import *
 from .markup import *
 from .rendered_element_cache import *
+from .utils import *
 
 __all__: List[str] = []
 
@@ -23,3 +24,4 @@ __all__ += element_supply.__all__
 __all__ += exceptions.__all__
 __all__ += markup.__all__
 __all__ += rendered_element_cache.__all__
+__all__ += utils.__all__
diff --git a/cheuph/attributed_lines.py b/cheuph/attributed_lines.py
index bf55fa9..970ecab 100644
--- a/cheuph/attributed_lines.py
+++ b/cheuph/attributed_lines.py
@@ -1,5 +1,6 @@
 # TODO retrieve attributes of any (x, y) coordinates
 # TODO retrieve attributes of closest existing line (by y coordinate)
+# TODO use ulen and unicode string splitting
 
 import collections
 from typing import Any, Deque, Iterator, List, Optional, Set, Tuple
diff --git a/cheuph/utils.py b/cheuph/utils.py
new file mode 100644
index 0000000..c17cbc1
--- /dev/null
+++ b/cheuph/utils.py
@@ -0,0 +1,32 @@
+import unicodedata
+
+__all__ = ["ulen"]
+
+# See http://www.unicode.org/reports/tr11/#ED7
+#
+# "In a broad sense, wide characters include W, F, and A (when in East Asian
+# context), and narrow characters include N, Na, H, and A (when not in East
+# Asian context)."
+_WIDE = {"W", "F", "A"} # when in East Asian context
+_NARROW = {"N", "Na", "H", "A"} # when not in East Asian context
+
+def ulen(string: str, east_asian_context: bool = False) -> int:
+    length = 0
+
+    if east_asian_context:
+        for char in string:
+            if char in _WIDE:
+                length += 2
+            else:
+                length += 1
+
+    else:
+        for char in string:
+            if char in _NARROW:
+                length += 1
+            else:
+                length += 2
+
+    return length
+
+# TODO unicode string splitting based on the same principle as above