Pure Lua implementation of md5 - hash

Is there a pure lua implementation of the md5 hashing algorithm? One that doesn't rely on any c or external libraries? There's javascript implementations that don't rely on c or anything, so it ought to be possible with lua.
Thanks!

I combined the mentioned lua MD5 library that required bitlib and added in LuaBit to make it a pure lua implementation.
As an additional benefit it's structured in such a way that it will work inside of the redis lua scripting environment.
Please note that it is extremely slow compared to other non pure lua based implementations.
--[[---------------
LuaBit v0.4
-------------------
a bitwise operation lib for lua.
http://luaforge.net/projects/bit/
How to use:
-------------------
bit.bnot(n) -- bitwise not (~n)
bit.band(m, n) -- bitwise and (m & n)
bit.bor(m, n) -- bitwise or (m | n)
bit.bxor(m, n) -- bitwise xor (m ^ n)
bit.brshift(n, bits) -- right shift (n >> bits)
bit.blshift(n, bits) -- left shift (n << bits)
bit.blogic_rshift(n, bits) -- logic right shift(zero fill >>>)
Please note that bit.brshift and bit.blshift only support number within
32 bits.
2 utility functions are provided too:
bit.tobits(n) -- convert n into a bit table(which is a 1/0 sequence)
-- high bits first
bit.tonumb(bit_tbl) -- convert a bit table into a number
-------------------
Under the MIT license.
copyright(c) 2006~2007 hanzhao (abrash_han#hotmail.com)
--]]---------------
--do
------------------------
-- bit lib implementions
local function check_int(n)
-- checking not float
if(n - math.floor(n) > 0) then
error("trying to use bitwise operation on non-integer!")
end
end
local function tbl_to_number(tbl)
local n = #tbl
local rslt = 0
local power = 1
for i = 1, n do
rslt = rslt + tbl[i]*power
power = power*2
end
return rslt
end
local function expand(tbl_m, tbl_n)
local big = {}
local small = {}
if(#tbl_m > #tbl_n) then
big = tbl_m
small = tbl_n
else
big = tbl_n
small = tbl_m
end
-- expand small
for i = #small + 1, #big do
small[i] = 0
end
end
local to_bits = function () end
local function bit_not(n)
local tbl = to_bits(n)
local size = math.max(#tbl, 32)
for i = 1, size do
if(tbl[i] == 1) then
tbl[i] = 0
else
tbl[i] = 1
end
end
return tbl_to_number(tbl)
end
to_bits = function (n)
check_int(n)
if(n < 0) then
-- negative
return to_bits(bit_not(math.abs(n)) + 1)
end
-- to bits table
local tbl = {}
local cnt = 1
while (n > 0) do
local last = math.mod(n,2)
if(last == 1) then
tbl[cnt] = 1
else
tbl[cnt] = 0
end
n = (n-last)/2
cnt = cnt + 1
end
return tbl
end
local function bit_or(m, n)
local tbl_m = to_bits(m)
local tbl_n = to_bits(n)
expand(tbl_m, tbl_n)
local tbl = {}
local rslt = math.max(#tbl_m, #tbl_n)
for i = 1, rslt do
if(tbl_m[i]== 0 and tbl_n[i] == 0) then
tbl[i] = 0
else
tbl[i] = 1
end
end
return tbl_to_number(tbl)
end
local function bit_and(m, n)
local tbl_m = to_bits(m)
local tbl_n = to_bits(n)
expand(tbl_m, tbl_n)
local tbl = {}
local rslt = math.max(#tbl_m, #tbl_n)
for i = 1, rslt do
if(tbl_m[i]== 0 or tbl_n[i] == 0) then
tbl[i] = 0
else
tbl[i] = 1
end
end
return tbl_to_number(tbl)
end
local function bit_xor(m, n)
local tbl_m = to_bits(m)
local tbl_n = to_bits(n)
expand(tbl_m, tbl_n)
local tbl = {}
local rslt = math.max(#tbl_m, #tbl_n)
for i = 1, rslt do
if(tbl_m[i] ~= tbl_n[i]) then
tbl[i] = 1
else
tbl[i] = 0
end
end
--table.foreach(tbl, print)
return tbl_to_number(tbl)
end
local function bit_rshift(n, bits)
check_int(n)
local high_bit = 0
if(n < 0) then
-- negative
n = bit_not(math.abs(n)) + 1
high_bit = 2147483648 -- 0x80000000
end
for i=1, bits do
n = n/2
n = bit_or(math.floor(n), high_bit)
end
return math.floor(n)
end
-- logic rightshift assures zero filling shift
local function bit_logic_rshift(n, bits)
check_int(n)
if(n < 0) then
-- negative
n = bit_not(math.abs(n)) + 1
end
for i=1, bits do
n = n/2
end
return math.floor(n)
end
local function bit_lshift(n, bits)
check_int(n)
if(n < 0) then
-- negative
n = bit_not(math.abs(n)) + 1
end
for i=1, bits do
n = n*2
end
return bit_and(n, 4294967295) -- 0xFFFFFFFF
end
local function bit_xor2(m, n)
local rhs = bit_or(bit_not(m), bit_not(n))
local lhs = bit_or(m, n)
local rslt = bit_and(lhs, rhs)
return rslt
end
-- An MD5 mplementation in Lua, requires bitlib (hacked to use LuaBit from above, ugh)
-- 10/02/2001 jcw#equi4.com
local md5={ff=tonumber('ffffffff',16),consts={}}
string.gsub([[ d76aa478 e8c7b756 242070db c1bdceee
f57c0faf 4787c62a a8304613 fd469501
698098d8 8b44f7af ffff5bb1 895cd7be
6b901122 fd987193 a679438e 49b40821
f61e2562 c040b340 265e5a51 e9b6c7aa
d62f105d 02441453 d8a1e681 e7d3fbc8
21e1cde6 c33707d6 f4d50d87 455a14ed
a9e3e905 fcefa3f8 676f02d9 8d2a4c8a
fffa3942 8771f681 6d9d6122 fde5380c
a4beea44 4bdecfa9 f6bb4b60 bebfbc70
289b7ec6 eaa127fa d4ef3085 04881d05
d9d4d039 e6db99e5 1fa27cf8 c4ac5665
f4292244 432aff97 ab9423a7 fc93a039
655b59c3 8f0ccc92 ffeff47d 85845dd1
6fa87e4f fe2ce6e0 a3014314 4e0811a1
f7537e82 bd3af235 2ad7d2bb eb86d391
67452301 efcdab89 98badcfe 10325476 ]],"(%w+)", function (s) table.insert(md5.consts, tonumber(s,16)) end)
--67452301 efcdab89 98badcfe 10325476 ]],"(%w+)", function (s) tinsert(md5.consts,tonumber(s,16)) end)
function md5.transform(A,B,C,D,X)
local f=function (x,y,z) return bit_or(bit_and(x,y),bit_and(-x-1,z)) end
local g=function (x,y,z) return bit_or(bit_and(x,z),bit_and(y,-z-1)) end
local h=function (x,y,z) return bit_xor(x,bit_xor(y,z)) end
local i=function (x,y,z) return bit_xor(y,bit_or(x,-z-1)) end
local z=function (f,a,b,c,d,x,s,ac)
a=bit_and(a+f(b,c,d)+x+ac,md5.ff)
-- be *very* careful that left shift does not cause rounding!
return bit_or(bit_lshift(bit_and(a,bit_rshift(md5.ff,s)),s),bit_rshift(a,32-s))+b
end
local a,b,c,d=A,B,C,D
local t=md5.consts
a=z(f,a,b,c,d,X[ 0], 7,t[ 1])
d=z(f,d,a,b,c,X[ 1],12,t[ 2])
c=z(f,c,d,a,b,X[ 2],17,t[ 3])
b=z(f,b,c,d,a,X[ 3],22,t[ 4])
a=z(f,a,b,c,d,X[ 4], 7,t[ 5])
d=z(f,d,a,b,c,X[ 5],12,t[ 6])
c=z(f,c,d,a,b,X[ 6],17,t[ 7])
b=z(f,b,c,d,a,X[ 7],22,t[ 8])
a=z(f,a,b,c,d,X[ 8], 7,t[ 9])
d=z(f,d,a,b,c,X[ 9],12,t[10])
c=z(f,c,d,a,b,X[10],17,t[11])
b=z(f,b,c,d,a,X[11],22,t[12])
a=z(f,a,b,c,d,X[12], 7,t[13])
d=z(f,d,a,b,c,X[13],12,t[14])
c=z(f,c,d,a,b,X[14],17,t[15])
b=z(f,b,c,d,a,X[15],22,t[16])
a=z(g,a,b,c,d,X[ 1], 5,t[17])
d=z(g,d,a,b,c,X[ 6], 9,t[18])
c=z(g,c,d,a,b,X[11],14,t[19])
b=z(g,b,c,d,a,X[ 0],20,t[20])
a=z(g,a,b,c,d,X[ 5], 5,t[21])
d=z(g,d,a,b,c,X[10], 9,t[22])
c=z(g,c,d,a,b,X[15],14,t[23])
b=z(g,b,c,d,a,X[ 4],20,t[24])
a=z(g,a,b,c,d,X[ 9], 5,t[25])
d=z(g,d,a,b,c,X[14], 9,t[26])
c=z(g,c,d,a,b,X[ 3],14,t[27])
b=z(g,b,c,d,a,X[ 8],20,t[28])
a=z(g,a,b,c,d,X[13], 5,t[29])
d=z(g,d,a,b,c,X[ 2], 9,t[30])
c=z(g,c,d,a,b,X[ 7],14,t[31])
b=z(g,b,c,d,a,X[12],20,t[32])
a=z(h,a,b,c,d,X[ 5], 4,t[33])
d=z(h,d,a,b,c,X[ 8],11,t[34])
c=z(h,c,d,a,b,X[11],16,t[35])
b=z(h,b,c,d,a,X[14],23,t[36])
a=z(h,a,b,c,d,X[ 1], 4,t[37])
d=z(h,d,a,b,c,X[ 4],11,t[38])
c=z(h,c,d,a,b,X[ 7],16,t[39])
b=z(h,b,c,d,a,X[10],23,t[40])
a=z(h,a,b,c,d,X[13], 4,t[41])
d=z(h,d,a,b,c,X[ 0],11,t[42])
c=z(h,c,d,a,b,X[ 3],16,t[43])
b=z(h,b,c,d,a,X[ 6],23,t[44])
a=z(h,a,b,c,d,X[ 9], 4,t[45])
d=z(h,d,a,b,c,X[12],11,t[46])
c=z(h,c,d,a,b,X[15],16,t[47])
b=z(h,b,c,d,a,X[ 2],23,t[48])
a=z(i,a,b,c,d,X[ 0], 6,t[49])
d=z(i,d,a,b,c,X[ 7],10,t[50])
c=z(i,c,d,a,b,X[14],15,t[51])
b=z(i,b,c,d,a,X[ 5],21,t[52])
a=z(i,a,b,c,d,X[12], 6,t[53])
d=z(i,d,a,b,c,X[ 3],10,t[54])
c=z(i,c,d,a,b,X[10],15,t[55])
b=z(i,b,c,d,a,X[ 1],21,t[56])
a=z(i,a,b,c,d,X[ 8], 6,t[57])
d=z(i,d,a,b,c,X[15],10,t[58])
c=z(i,c,d,a,b,X[ 6],15,t[59])
b=z(i,b,c,d,a,X[13],21,t[60])
a=z(i,a,b,c,d,X[ 4], 6,t[61])
d=z(i,d,a,b,c,X[11],10,t[62])
c=z(i,c,d,a,b,X[ 2],15,t[63])
b=z(i,b,c,d,a,X[ 9],21,t[64])
return A+a,B+b,C+c,D+d
end
-- convert little-endian 32-bit int to a 4-char string
local function leIstr(i)
local f=function (s) return string.char(bit_and(bit_rshift(i,s),255)) end
return f(0)..f(8)..f(16)..f(24)
end
-- convert raw string to big-endian int
local function beInt(s)
local v=0
for i=1,string.len(s) do v=v*256+string.byte(s,i) end
return v
end
-- convert raw string to little-endian int
local function leInt(s)
local v=0
for i=string.len(s),1,-1 do v=v*256+string.byte(s,i) end
return v
end
-- cut up a string in little-endian ints of given size
local function leStrCuts(s,...)
local o,r=1,{}
for i=1,#arg do
table.insert(r,leInt(string.sub(s,o,o+arg[i]-1)))
o=o+arg[i]
end
return r
end
function md5.Calc(s)
local msgLen=string.len(s)
local padLen=56- msgLen % 64
if msgLen % 64 > 56 then padLen=padLen+64 end
if padLen==0 then padLen=64 end
s=s..string.char(128)..string.rep(string.char(0),padLen-1)
s=s..leIstr(8*msgLen)..leIstr(0)
assert(string.len(s) % 64 ==0)
local t=md5.consts
local a,b,c,d=t[65],t[66],t[67],t[68]
for i=1,string.len(s),64 do
local X=leStrCuts(string.sub(s,i,i+63),4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4)
assert(#X==16)
X[0]=table.remove(X,1) -- zero based!
a,b,c,d=md5.transform(a,b,c,d,X)
end
local swap=function (w) return beInt(leIstr(w)) end
return string.format("%08x%08x%08x%08x",swap(a),swap(b),swap(c),swap(d))
end
return md5.Calc("asdf"); -- 912ec803b2ce49e4a541068d495ab570
It is available in this gist.

http://equi4.com/md5/md5calc.lua but that still requires a lib.

There's an old one here but it requires bitlib.

I've refined #Adam Baldwin's solution and made a library that calculates md5 sums in pure Lua, with no external dependencies and no C:
https://github.com/kikito/md5.lua
Summary of changes:
Implemented two functions on the interface, md5.sum and md5.sumhex, that work exactly like their counterparts in the Kepler library, but are implemented in Lua alone.
Added a small test suite with using busted.
Removed some unused functions
Avoided re-creation of anonymous functions when it was not needed
Simplified the creation of the constants, and made them private
Spacing and naming changes.
For now I don't need md5.crypt and md5.decrypt, so I have not implemented those. But I will accept pull requests :)

Related

Unpacking a vector into an array of a certain bit width

Suppose I have a vector of bits. I'd like to convert it into an array of n bit values, where n is a variable (not a parameter). Can I achieve this using the streaming operators? I tried this (right now I'm just trying a value of 3, but eventually '3' should be variable):
module tb;
bit [51:0] vector = 'b111_110_101_100_011_010_001_000;
byte vector_byte[];
initial begin
$displayb(vector);
vector_byte = {<<3{vector}};
foreach(vector_byte[i])
$display("%0d = %0b", i, vector_byte[i]);
end
endmodule
What I was expecting was:
vector_byte = '{'b000, 'b001, 'b010 ... 'b111};
However, the output I got was:
# vsim -voptargs=+acc=npr
# run -all
# 00000000000000000000000000000000111110101100011010001000
# 0 = 101
# 1 = 111001
# 2 = 1110111
# 3 = 0
# 4 = 0
# 5 = 0
# 6 = 0
# exit
Am I just using the streaming operators wrong?
The streaming operators only work with contiguous streams. You need 5'b00000 inserted into each byte.
module tb;
bit [51:0] vector = 'b111_110_101_100_011_010_001_000;
int W = 3;
byte vector_byte[];
initial begin
vector_byte = new[$bits(vector)/3];
$displayb(vector);
foreach(vector_byte[i]) begin
vector_byte[i] = vector[i*W+:8] & (1<<W)-1; // mask W is in range 1-8
$display("%0d = %0b", i, vector_byte[i]);
end
end
endmodule

Reduce length of the pseudo-encrypt function ouput

i have a question about the pseudo-encrypt function for postgres.
Is there any way that I can reduce the output to 6? I really like this function and want to use it, but only need a output between 1 and 999999.
This question is related to my last question. I want to use it to created unqiue numbers between 1 and 999999.
Thank you.
Use mod on the generated value to generate number in range from start_value to end_value:
select start_value + mod(pseudo_encrypt(number), end_value - start_value + 1);
For your case this will be look like:
select 1 + mod(pseudo_encrypt(23452), 999999);
It's not quite straightforward to set an upper bound of 999999, as the algorithm operates on blocks of bits, so it's hard to get away from powers of two.
You can work around this by cycle walking - just try encrypt(n), encrypt(encrypt(n)), encrypt(encrypt(encrypt(n)))... until you end up with a result in the range [1,999999]. In the interest of keeping the number of iterations to a minimum, you want to adjust the block size to get you as close to this range as possible.
This version will let you specify a range for the input/output:
CREATE OR REPLACE FUNCTION pseudo_encrypt(
value INT8,
min INT8 DEFAULT 0,
max INT8 DEFAULT (2^62::NUMERIC)-1
) RETURNS INT8 AS
$$
DECLARE
rounds CONSTANT INT = 3;
L INT8[];
R INT8[];
i INT;
blocksize INT;
blockmask INT8;
result INT8;
BEGIN
max = max - min;
value = value - min;
IF NOT ((value BETWEEN 0 AND max) AND (max BETWEEN 0 AND 2^62::NUMERIC-1)) THEN
RAISE 'Input out of range';
END IF;
blocksize = ceil(char_length(ltrim(max::BIT(64)::TEXT,'0'))/2.0);
blockmask = (2^blocksize::NUMERIC-1)::INT8;
result = value;
LOOP
L[1] = (result >> blocksize) & blockmask;
R[1] = result & blockmask;
FOR i IN 1..rounds LOOP
L[i+1] = R[i];
R[i+1] = L[i] # ((941083981*R[i] + 768614336404564651) & blockmask);
END LOOP;
result = (L[rounds]::INT8 << blocksize) | R[rounds];
IF result <= max THEN
RETURN result + min;
END IF;
END LOOP;
END;
$$
LANGUAGE plpgsql STRICT IMMUTABLE;
I can't guarantee its correctness, but you can easily show that it maps [1,999999] back to [1,999999]:
SELECT i FROM generate_series(1,999999) s(i)
EXCEPT
SELECT pseudo_encrypt(i,1,999999) FROM generate_series(1,999999) s(i)

Got "Boolean" expected "LongInt" pascal

I get this error on my insertion sort algorithm:
insertionsort.lpr(19,17) Error: Incompatible types: got "Boolean" expected "LongInt"
Here's the line 19 of my code
while j > 0 and A[j]>key do
I have tried googling all over the internet but i couldn't find any syntax errors or anything.
Here's the full code if it helps :
program instert;
uses crt;
const
N = 5;
var
i:integer;
j:integer;
key:integer;
A : Array[1..N] of Integer;
procedure insertionsort;
begin
for i := 2 to N do
begin
key := A[1];
j:= i - 1;
while j > 0 and A[j]>key do
begin
A[j+1] := A[j] ;
j := j-1;
end;
A[j+1] := key ;
end;
end;
begin
A[1]:= 9;
A[2]:= 6;
A[3]:= 7;
A[4]:= 1;
A[5]:= 2;
insertionsort;
end.
I also get the same error on the bubble sort algorithm i did. Here's the error line
bubblesort.lpr(26,14) Error: Incompatible types: got "Boolean" expected "LongInt"
Here's line 26 of my algorithm:
until flag = false or N = 1 ;
Here's the full code:
program bubblesort;
uses crt;
var
flag:boolean;
count:integer;
temp:integer;
N:integer;
A : Array[1..N] of Integer;
procedure bubblesort ;
begin
Repeat
flag:=false;
for count:=1 to (N-1) do
begin
if A[count] > A[count + 1] then
begin
temp := A[count];
A[count] := A[count + 1];
A[count] := temp;
flag := true;
end;
end;
N := N - 1;
until flag = false or N = 1 ;
end;
begin
A[1]:= 9;
A[2]:= 6;
A[3]:= 7;
A[4]:= 1;
A[5]:= 2;
N := 5;
bubblesort;
end.
In Pascal, boolean operators and and or have higher precedence than the comparison operators >, =, etc. So in the expression:
while j > 0 and A[j] > key do
Given that and has higher precedence, Pascal sees this as:
while (j > (0 and A[j])) > key do
0 and A[j] are evaluated as a bitwise and (since the arguments are integers) resulting in an integer. Then the comparison, j > (0 and A[j]) is evaluated as a boolean result, leaving a check of that with > key, which is boolean > longint. You then get the error that a longint is expected instead of the boolean for the arithmetic comparison.
The way to fix it is to parenthesize:
while (j > 0) and (A[j] > key) do ...
The same issue applies with this statement:
until flag = false or N = 1 ;
which yields an error because or is higher precedence than =. So you can parenthesize:
until (flag = false) or (N = 1);
Or, more canonical for booleans:
until not flag or (N = 1); // NOTE: 'not' is higher precedence than 'or'
When in doubt about the precedence of operators, parenthesizing is a good idea, as it removes the doubt about what order operations are going to occur.

Getting error from: dlen = uint32(0) ;

I don't know why but I am getting this error:
Error in mr_lsbpex (line 3)
dlen = uint32(0) ;
Output argument "a" (and maybe others) not assigned during call to "E:\path\mr_lsbpex.m>mr_lsbpex"
I have tested "dlen = uint32(0) ;" in matlab enviorment (outside of this function) and everything was OK. Here is my code:
function a = mr_lsbpex ( r, p )
% extract from an array
dlen = uint32(0) ;
s = size (r) ;
rnd = rand (s(1),s(2)) ;
rd = 32 ;
rl = s(2) ;
for i=1:s(2)
if rnd(1,i)<rd/rl
d = bitget (round(r(1,i)/p),1);
dlen = bitset (dlen,rd,d);
rd = rd -1 ;
end
rl = rl -1 ;
end
if (dlen > 10000000 )
clear a ;
return ;
end
a = uint8(zeros(dlen,1)) ;
rd = double(dlen * 8) ;
rl = double(s(1)*s(2)-s(2)) ;
for i=2:s(1)
for j=1:s(2)
if rnd(i,j)<rd/rl
d = bitget (round(r(i,j)/p) ,1) ;
a = z_set_bit (a,rd,d) ;
rd = rd - 1 ;
end
rl = rl - 1 ;
end
end
Remember: a needs to be returned ALLWAYS!
The error is not in that specific line, but in the "whole" function itself.
Your problem is that Matlab thinks that a its not going to be created. And actually in some case it may not be created.
The following line in the beginning of your function should do the trick
a=0; % well, or a=NaN; or whatever you want to return
Additionally, don't clear a in if (dlen > 10000000 ).

How to write a unicode symbol in lua

How can I write a Unicode symbol in lua. For example I have to write symbol with 9658
when I write
string.char( 9658 );
I got an error. So how is it possible to write such a symbol.
Lua does not look inside strings. So, you can just write
mychar = "►"
(added in 2015)
Lua 5.3 introduced support for UTF-8 escape sequences:
The UTF-8 encoding of a Unicode character can be inserted in a literal string with the escape sequence \u{XXX} (note the mandatory enclosing brackets), where XXX is a sequence of one or more hexadecimal digits representing the character code point.
You can also use utf8.char(9658).
Here is an encoder for Lua that takes a Unicode code point and produces a UTF-8 string for the corresponding character:
do
local bytemarkers = { {0x7FF,192}, {0xFFFF,224}, {0x1FFFFF,240} }
function utf8(decimal)
if decimal<128 then return string.char(decimal) end
local charbytes = {}
for bytes,vals in ipairs(bytemarkers) do
if decimal<=vals[1] then
for b=bytes+1,2,-1 do
local mod = decimal%64
decimal = (decimal-mod)/64
charbytes[b] = string.char(128+mod)
end
charbytes[1] = string.char(vals[2]+decimal)
break
end
end
return table.concat(charbytes)
end
end
c=utf8(0x24) print(c.." is "..#c.." bytes.") --> $ is 1 bytes.
c=utf8(0xA2) print(c.." is "..#c.." bytes.") --> ¢ is 2 bytes.
c=utf8(0x20AC) print(c.." is "..#c.." bytes.") --> € is 3 bytes.
c=utf8(0x24B62) print(c.." is "..#c.." bytes.") --> 𤭢 is 4 bytes.
Maybe this can help you:
function FromUTF8(pos)
local mod = math.mod
local function charat(p)
local v = editor.CharAt[p]; if v < 0 then v = v + 256 end; return v
end
local v, c, n = 0, charat(pos), 1
if c < 128 then v = c
elseif c < 192 then
error("Byte values between 0x80 to 0xBF cannot start a multibyte sequence")
elseif c < 224 then v = mod(c, 32); n = 2
elseif c < 240 then v = mod(c, 16); n = 3
elseif c < 248 then v = mod(c, 8); n = 4
elseif c < 252 then v = mod(c, 4); n = 5
elseif c < 254 then v = mod(c, 2); n = 6
else
error("Byte values between 0xFE and OxFF cannot start a multibyte sequence")
end
for i = 2, n do
pos = pos + 1; c = charat(pos)
if c < 128 or c > 191 then
error("Following bytes must have values between 0x80 and 0xBF")
end
v = v * 64 + mod(c, 64)
end
return v, pos, n
end
To get broader support for Unicode string content, one approach is slnunicode which was developed as part of the Selene database library. It will give you a module that supports most of what the standard string library does, but with Unicode characters and UTF-8 encoding.