Add unicode.awk POC file for unicode characters.
diff --git a/texindex/unicode.awk b/texindex/unicode.awk
new file mode 100644
index 0000000..c03baaa
--- /dev/null
+++ b/texindex/unicode.awk
@@ -0,0 +1,20 @@
+BEGIN {
+ printf("0x10FFFF is %d\n", 0x10FFFF)
+ printf("0xFFFD is %d\n", 0xFFFD)
+ printf("0xFFFD is '%c'\n", 0xFFFD)
+ invalid = sprintf("%c\n", 0xFFFD)
+
+ count = 0
+ for (i = 0; i <= 0x10FFFF; i++) {
+ char = sprintf("%c", i)
+ if (isvalid(char))
+ count++
+ }
+
+ printf("%d valid characters between 1 and %d\n", count, 0x10FFFF)
+}
+
+function isvalid(c)
+{
+ return c ~ /[[:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:]]/
+}