Windows C API for UTF8 to 1252 - unicode

I'm familiar with WideCharToMultiByte and MultiByteToWideChar conversions and could use these to do something like:
UTF8 -> UTF16 -> 1252
I know that iconv will do what I need, but does anybody know of any MS libs that will allow this in a single call?
I should probably just pull in the iconv library, but am feeling lazy.
Thanks

Windows 1252 is mostly equivalent to latin-1, aka ISO-8859-1: Windows-1252 just has some additional characters allocated in the latin-1 reserved range 128-159. If you are ready to ignore those extra characters, and stick to latin-1, then conversion is rather easy. Try this:
#include <stddef.h>
/*
* Convert from UTF-8 to latin-1. Invalid encodings, and encodings of
* code points beyond 255, are replaced by question marks. No more than
* dst_max_len bytes are stored in the destination array. Returned value
* is the length that the latin-1 string would have had, assuming a big
* enough destination buffer.
*/
size_t
utf8_to_latin1(char *src, size_t src_len,
char *dst, size_t dst_max_len)
{
unsigned char *sb;
size_t u, v;
u = v = 0;
sb = (unsigned char *)src;
while (u < src_len) {
int c = sb[u ++];
if (c >= 0x80) {
if (c >= 0xC0 && c < 0xE0) {
if (u == src_len) {
c = '?';
} else {
int w = sb[u];
if (w >= 0x80 && w < 0xC0) {
u ++;
c = ((c & 0x1F) << 6)
+ (w & 0x3F);
} else {
c = '?';
}
}
} else {
int i;
for (i = 6; i >= 0; i --)
if (!(c & (1 << i)))
break;
c = '?';
u += i;
}
}
if (v < dst_max_len)
dst[v] = (char)c;
v ++;
}
return v;
}
/*
* Convert from latin-1 to UTF-8. No more than dst_max_len bytes are
* stored in the destination array. Returned value is the length that
* the UTF-8 string would have had, assuming a big enough destination
* buffer.
*/
size_t
latin1_to_utf8(char *src, size_t src_len,
char *dst, size_t dst_max_len)
{
unsigned char *sb;
size_t u, v;
u = v = 0;
sb = (unsigned char *)src;
while (u < src_len) {
int c = sb[u ++];
if (c < 0x80) {
if (v < dst_max_len)
dst[v] = (char)c;
v ++;
} else {
int h = 0xC0 + (c >> 6);
int l = 0x80 + (c & 0x3F);
if (v < dst_max_len) {
dst[v] = (char)h;
if ((v + 1) < dst_max_len)
dst[v + 1] = (char)l;
}
v += 2;
}
}
return v;
}
Note that I make no guarantee about this code. This is completely untested.

Related

CUDA sha256 produce difference hash compared to OpenSSL

iam trying to port my sha256 hash function from CPU code to CUDA. after googling, i found few working example for cuda sha256. However when tested, the hash result of cuda sha256 is difference from OpenSSL.
My input is "hello world" which is declared as const char*. result are as below;
Constant Char* Input : hello world
Hash on CPU : b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9
Hash on GPU : c1114db6b517b4db8d360a9e14f5c2a57de95d955ec20cbd4cb73facb2b13e5f
I need help to fix my GPU code for sha256 so that it will produce same hash as given by CPU (OpenSSL).
Here my code for CPU Hash
#pragma warning(disable : 4996) //disable compiler error
#include <iostream>
#include <openssl/sha.h>
unsigned char hash[SHA256_DIGEST_LENGTH];
void SHA256(const char* input, size_t input_size){
SHA256_CTX sha256;
SHA256_Init(&sha256);
SHA256_Update(&sha256, input, input_size);
SHA256_Final(hash, &sha256);
}
void CPU() {
const char* input = "hello world";
size_t input_size = strlen(input);
SHA256(input, input_size);
for (size_t i = 0; i < 32; i++) {
printf("%02x", hash[i]);
}
printf("\n");
}
and Here my code for GPU hash
#include <stdio.h>
#include <string.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#define BLOCK_SIZE 256
__constant__ unsigned int k[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
__device__ unsigned int Ch(unsigned int x, unsigned int y, unsigned int z) {
return (x & y) ^ (~x & z);
}
__device__ unsigned int Maj(unsigned int x, unsigned int y, unsigned int z) {
return (x & y) ^ (x & z) ^ (y & z);
}
__device__ unsigned int Sigma0(unsigned int x) {
return (x >> 2u) | (x << 30u);
}
__device__ unsigned int Sigma1(unsigned int x) {
return (x >> 6u) | (x << 26u);
}
__device__ unsigned int sigma0(unsigned int x) {
return (x >> 7u) | (x << 25u);
}
__device__ unsigned int sigma1(unsigned int x) {
return (x >> 17u) | (x << 15u);
}
//solve using 256 thread in 1 block
__global__ void sha256_kernel(const char* input, size_t input_size, unsigned char* output) {
size_t i = blockIdx.x * blockDim.x + threadIdx.x;
size_t grid_size = blockDim.x * gridDim.x;
for (; i < input_size; i += grid_size) {
unsigned int h[8] = {
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
};
unsigned int w[64];
for (size_t j = 0; j < input_size; j += 64) {
for (size_t t = 0; t < 16; t++) {
w[t] = ((unsigned int)input[j + t * 4 + 0] << 24u) | ((unsigned int)input[j + t * 4 + 1] << 16u) |
((unsigned int)input[j + t * 4 + 2] << 8u) | ((unsigned int)input[j + t * 4 + 3] << 0u);
}
for (size_t t = 16; t < 64; t++) {
w[t] = sigma1(w[t - 2]) + w[t - 7] + sigma0(w[t - 15]) + w[t - 16];
}
unsigned int a = h[0];
unsigned int b = h[1];
unsigned int c = h[2];
unsigned int d = h[3];
unsigned int e = h[4];
unsigned int f = h[5];
unsigned int g = h[6];
unsigned int hh = h[7];
for (size_t t = 0; t < 64; t++) {
unsigned int t1 = hh + Sigma1(e) + Ch(e, f, g) + k[t] + w[t];
unsigned int t2 = Sigma0(a) + Maj(a, b, c);
hh = g;
g = f;
f = e;
e = d + t1;
d = c;
c = b;
b = a;
a = t1 + t2;
}
h[0] += a;
h[1] += b;
h[2] += c;
h[3] += d;
h[4] += e;
h[5] += f;
h[6] += g;
h[7] += hh;
}
for (size_t t = 0; t < 8; t++) {
output[i + t * 4 + 0] = (unsigned char)(h[t] >> 24u);
output[i + t * 4 + 1] = (unsigned char)(h[t] >> 16u);
output[i + t * 4 + 2] = (unsigned char)(h[t] >> 8u);
output[i + t * 4 + 3] = (unsigned char)(h[t] >> 0u);
}
}
}
void GPU() {
const char* input = "hello world";
size_t input_size = strlen(input);
size_t output_size = 32;
unsigned char* output;
char* input_device;
cudaMalloc((void**)&output, output_size);
cudaMalloc((void**)&input_device, input_size);
cudaMemcpy(input_device, input, input_size, cudaMemcpyHostToDevice);
//solve using 256 thread and 1 block
sha256_kernel << < ((input_size + BLOCK_SIZE - 1) / BLOCK_SIZE), BLOCK_SIZE >> > (input_device, input_size, output);
unsigned char* output_host = (unsigned char*)malloc(output_size);
cudaMemcpy(output_host, output, output_size, cudaMemcpyDeviceToHost);
for (size_t i = 0; i < output_size; i++) {
printf("%02x", output_host[i]);
}
printf("\n");
free(output_host);
cudaFree(output);
cudaFree(input_device);}
Thanks in advance.

Unicode escape in C

I m using CURL to retreive a JSON string from Wikipedia, eg
https://en.wikipedia.org/w/api.php?action=opensearch&limit=1&format=json&search=syntax
I don't use a JSON parser, and I am searching a way to remove the unicode part, which is useless for me.
I have already tried &utf8= in url, and WideCharToMultiByte and wcstombs.
I just want to convert this string to ANSI format, to be used in another application, even if I will lose data.
Here is the code I am using to convert the string:
void UnicodeToAnsi(char *str, char *str2)
{
unsigned char ch;
char *pr = str;
char *pw = str2;
while ( (*pr) != 0 )
{
ch = (*pr);
if ( ch == '\\' )
{
if ( *(pr+1) == 'u')
{
char szANSIString [2] = {'\0'};
wchar_t wcsString[2] = {0,'\0'};
char h[5]={'\0'};
int v;
strncpy(h,pr + 2,4);
v = (int)strtol(h, NULL, 16);
#if 0
wcsString[0] = v;
#ifndef _WIN32
WideCharToMultiByte ( CP_ACP, // ANSI code page
WC_COMPOSITECHECK, // Check for accented characters
wcsString, // Source Unicode string
-1, // -1 means string is zero-terminated
szANSIString, // Destination char string
sizeof(szANSIString), // Size of buffer
NULL, // No default character
NULL ); // Don't care about this flag
#else
wcstombs(szANSIString, wcsString, sizeof(szANSIString));
#endif
ch = *szANSIString;
if (ch == '\0') ch = '?';
#endif
//bored with this unicode, easy way
ch = '-';
if (v == 232) ch = 138;
if (v == 233) ch = 130;
if (v == 234) ch = 136;
if (v == 224) ch = 133;
if (v == 225) ch = 'a';
if (v == 226) ch = 'a';
if (v == 257) ch = 'a';
if (v == 231) ch = 135;
pr = pr + 5;
}
}
(*pw) = ch;
++pw;
++pr;
}
*pw = '\0';
return;
}

Small differences in SHA1 hashes

A project I am working on uses Apache Shiro as a security framework. Passwords are SHA1 hashed (no salt, no iterations). Login is SSL secured. However, the remaining part of the application is not SSL secured. In this context (no SSL) there should be a form where a user can change the password.
Since it wouldn't be a good idea to transmit it plainly it should be hashed on the client and then transmitted to the server. As the client is GWT (2.3) based, I am trying this library http://code.google.com/p/gwt-crypto, which uses code from bouncycastle.
However, in many cases (not all) the hashes generated by both frameworks differ in 1-4(?) characters.
For instance "happa3" is hashed to
"fe7f3cffd8a5f0512a5f1120f1369f48cd6f47c2"
by both implementations, whereas just "happa" is hashed to
"fb3c3a741b4e07a87d9cb68f3db020d6fbfed00a"
by the Shiro implementation and to
"fb3c3a741b4e07a87d9cb63f3db020d6fbfed00a"
by the gwt-crypto implementation (23rd character differs).
I wonder whether there is a "correct"/standard SHA1 hashing and whether there is a bug in one of the libraries or maybe my usage of them is flawed.
One of my first thoughts was related to different encodings or strange conversions due to different transport mechanisms (RPC vs. Post). To my knowledge though (and what puzzles me most), SHA1 hashes should differ completely with a high probability if there is just a difference of a single bit. So different encodings shouldn't be the issue here.
I am using this code on the client (GWT) for hashing:
String hashed = toHex(createSHA1Hash("password"));
...
private String createSHA1Hash(String passwordString){
SHA1Digest sha1 = new SHA1Digest();
byte[] bytes;
byte[] result = new byte[sha1.getDigestSize()];
try {
bytes = passwordString.getBytes();
sha1.update(bytes, 0, bytes.length);
int val = sha1.doFinal(result, 0);
} catch (UnsupportedEncodingException e) {}
return new String(result);
}
public String toHex(String arg) {
return new BigInteger(1, arg.getBytes()).toString(16);
}
And this on the server (Shiro):
String hashed = new Sha1Hash("password").toHex()
which afaics does something very similar behind the scenes (had a quick view on the source code).
Did I miss something obvious here?
EDIT: Seems like the GWT code does not run natively for some reason (i.e. just in development mode) and silently fails (it does compile, though). Have to find out why...
Edit(2): "int val = sha1.doFinal(result, 0);" is the line that makes trouble, i.e. if present, the whole code does not run natively (JS) but only in dev-mode (with wrong results)
You could test this version:
public class SHA1 {
public static native String calcSHA1(String s) /*-{
//
// A JavaScript implementation of the Secure Hash Algorithm, SHA-1, as defined
// in FIPS 180-1
// Version 2.2 Copyright Paul Johnston 2000 - 2009.
// Other contributors: Greg Holt, Andrew Kepert, Ydnar, Lostinet
// Distributed under the BSD License
// See http://pajhome.org.uk/crypt/md5 for details.
//
//
// Configurable variables. You may need to tweak these to be compatible with
// the server-side, but the defaults work in most cases.
//
var hexcase = 0; // hex output format. 0 - lowercase; 1 - uppercase
var b64pad = ""; // base-64 pad character. "=" for strict RFC compliance
//
// These are the functions you'll usually want to call
// They take string arguments and return either hex or base-64 encoded strings
//
function b64_sha1(s) { return rstr2b64(rstr_sha1(str2rstr_utf8(s))); }
function any_sha1(s, e) { return rstr2any(rstr_sha1(str2rstr_utf8(s)), e); }
function hex_hmac_sha1(k, d)
{ return rstr2hex(rstr_hmac_sha1(str2rstr_utf8(k), str2rstr_utf8(d))); }
function b64_hmac_sha1(k, d)
{ return rstr2b64(rstr_hmac_sha1(str2rstr_utf8(k), str2rstr_utf8(d))); }
function any_hmac_sha1(k, d, e)
{ return rstr2any(rstr_hmac_sha1(str2rstr_utf8(k), str2rstr_utf8(d)), e); }
//
// Perform a simple self-test to see if the VM is working
//
function sha1_vm_test()
{
return hex_sha1("abc").toLowerCase() == "a9993e364706816aba3e25717850c26c9cd0d89d";
}
//
// Calculate the SHA1 of a raw string
//
function rstr_sha1(s)
{
return binb2rstr(binb_sha1(rstr2binb(s), s.length * 8));
}
//
// Calculate the HMAC-SHA1 of a key and some data (raw strings)
//
function rstr_hmac_sha1(key, data)
{
var bkey = rstr2binb(key);
if(bkey.length > 16) bkey = binb_sha1(bkey, key.length * 8);
var ipad = Array(16), opad = Array(16);
for(var i = 0; i < 16; i++)
{
ipad[i] = bkey[i] ^ 0x36363636;
opad[i] = bkey[i] ^ 0x5C5C5C5C;
}
var hash = binb_sha1(ipad.concat(rstr2binb(data)), 512 + data.length * 8);
return binb2rstr(binb_sha1(opad.concat(hash), 512 + 160));
}
//
// Convert a raw string to a hex string
//
function rstr2hex(input)
{
try { hexcase } catch(e) { hexcase=0; }
var hex_tab = hexcase ? "0123456789ABCDEF" : "0123456789abcdef";
var output = "";
var x;
for(var i = 0; i < input.length; i++)
{
x = input.charCodeAt(i);
output += hex_tab.charAt((x >>> 4) & 0x0F)
+ hex_tab.charAt( x & 0x0F);
}
return output;
}
//
// Convert a raw string to a base-64 string
//
function rstr2b64(input)
{
try { b64pad } catch(e) { b64pad=''; }
var tab = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
var output = "";
var len = input.length;
for(var i = 0; i < len; i += 3)
{
var triplet = (input.charCodeAt(i) << 16)
| (i + 1 < len ? input.charCodeAt(i+1) << 8 : 0)
| (i + 2 < len ? input.charCodeAt(i+2) : 0);
for(var j = 0; j < 4; j++)
{
if(i * 8 + j * 6 > input.length * 8) output += b64pad;
else output += tab.charAt((triplet >>> 6*(3-j)) & 0x3F);
}
}
return output;
}
//
// Convert a raw string to an arbitrary string encoding
//
function rstr2any(input, encoding)
{
var divisor = encoding.length;
var remainders = Array();
var i, q, x, quotient;
// Convert to an array of 16-bit big-endian values, forming the dividend
var dividend = Array(Math.ceil(input.length / 2));
for(i = 0; i < dividend.length; i++)
{
dividend[i] = (input.charCodeAt(i * 2) << 8) | input.charCodeAt(i * 2 + 1);
}
//
// Repeatedly perform a long division. The binary array forms the dividend,
// the length of the encoding is the divisor. Once computed, the quotient
// forms the dividend for the next step. We stop when the dividend is zero.
// All remainders are stored for later use.
//
while(dividend.length > 0)
{
quotient = Array();
x = 0;
for(i = 0; i < dividend.length; i++)
{
x = (x << 16) + dividend[i];
q = Math.floor(x / divisor);
x -= q * divisor;
if(quotient.length > 0 || q > 0)
quotient[quotient.length] = q;
}
remainders[remainders.length] = x;
dividend = quotient;
}
// Convert the remainders to the output string
var output = "";
for(i = remainders.length - 1; i >= 0; i--)
output += encoding.charAt(remainders[i]);
// Append leading zero equivalents
var full_length = Math.ceil(input.length * 8 /
(Math.log(encoding.length) / Math.log(2)))
for(i = output.length; i < full_length; i++)
output = encoding[0] + output;
return output;
}
//
// Encode a string as utf-8.
// For efficiency, this assumes the input is valid utf-16.
//
function str2rstr_utf8(input)
{
var output = "";
var i = -1;
var x, y;
while(++i < input.length)
{
// Decode utf-16 surrogate pairs
x = input.charCodeAt(i);
y = i + 1 < input.length ? input.charCodeAt(i + 1) : 0;
if(0xD800 <= x && x <= 0xDBFF && 0xDC00 <= y && y <= 0xDFFF)
{
x = 0x10000 + ((x & 0x03FF) << 10) + (y & 0x03FF);
i++;
}
// Encode output as utf-8
if(x <= 0x7F)
output += String.fromCharCode(x);
else if(x <= 0x7FF)
output += String.fromCharCode(0xC0 | ((x >>> 6 ) & 0x1F),
0x80 | ( x & 0x3F));
else if(x <= 0xFFFF)
output += String.fromCharCode(0xE0 | ((x >>> 12) & 0x0F),
0x80 | ((x >>> 6 ) & 0x3F),
0x80 | ( x & 0x3F));
else if(x <= 0x1FFFFF)
output += String.fromCharCode(0xF0 | ((x >>> 18) & 0x07),
0x80 | ((x >>> 12) & 0x3F),
0x80 | ((x >>> 6 ) & 0x3F),
0x80 | ( x & 0x3F));
}
return output;
}
//
// Encode a string as utf-16
//
function str2rstr_utf16le(input)
{
var output = "";
for(var i = 0; i < input.length; i++)
output += String.fromCharCode( input.charCodeAt(i) & 0xFF,
(input.charCodeAt(i) >>> 8) & 0xFF);
return output;
}
function str2rstr_utf16be(input)
{
var output = "";
for(var i = 0; i < input.length; i++)
output += String.fromCharCode((input.charCodeAt(i) >>> 8) & 0xFF,
input.charCodeAt(i) & 0xFF);
return output;
}
//
// Convert a raw string to an array of big-endian words
// Characters >255 have their high-byte silently ignored.
//
function rstr2binb(input)
{
var output = Array(input.length >> 2);
for(var i = 0; i < output.length; i++)
output[i] = 0;
for(var i = 0; i < input.length * 8; i += 8)
output[i>>5] |= (input.charCodeAt(i / 8) & 0xFF) << (24 - i % 32);
return output;
}
//
// Convert an array of big-endian words to a string
//
function binb2rstr(input)
{
var output = "";
for(var i = 0; i < input.length * 32; i += 8)
output += String.fromCharCode((input[i>>5] >>> (24 - i % 32)) & 0xFF);
return output;
}
//
// Calculate the SHA-1 of an array of big-endian words, and a bit length
//
function binb_sha1(x, len)
{
// append padding
x[len >> 5] |= 0x80 << (24 - len % 32);
x[((len + 64 >> 9) << 4) + 15] = len;
var w = Array(80);
var a = 1732584193;
var b = -271733879;
var c = -1732584194;
var d = 271733878;
var e = -1009589776;
for(var i = 0; i < x.length; i += 16)
{
var olda = a;
var oldb = b;
var oldc = c;
var oldd = d;
var olde = e;
for(var j = 0; j < 80; j++)
{
if(j < 16) w[j] = x[i + j];
else w[j] = bit_rol(w[j-3] ^ w[j-8] ^ w[j-14] ^ w[j-16], 1);
var t = safe_add(safe_add(bit_rol(a, 5), sha1_ft(j, b, c, d)),
safe_add(safe_add(e, w[j]), sha1_kt(j)));
e = d;
d = c;
c = bit_rol(b, 30);
b = a;
a = t;
}
a = safe_add(a, olda);
b = safe_add(b, oldb);
c = safe_add(c, oldc);
d = safe_add(d, oldd);
e = safe_add(e, olde);
}
return Array(a, b, c, d, e);
}
//
// Perform the appropriate triplet combination function for the current
// iteration
//
function sha1_ft(t, b, c, d)
{
if(t < 20) return (b & c) | ((~b) & d);
if(t < 40) return b ^ c ^ d;
if(t < 60) return (b & c) | (b & d) | (c & d);
return b ^ c ^ d;
}
//
// Determine the appropriate additive constant for the current iteration
//
function sha1_kt(t)
{
return (t < 20) ? 1518500249 : (t < 40) ? 1859775393 :
(t < 60) ? -1894007588 : -899497514;
}
//
// Add integers, wrapping at 2^32. This uses 16-bit operations internally
// to work around bugs in some JS interpreters.
//
function safe_add(x, y)
{
var lsw = (x & 0xFFFF) + (y & 0xFFFF);
var msw = (x >> 16) + (y >> 16) + (lsw >> 16);
return (msw << 16) | (lsw & 0xFFFF);
}
//
// Bitwise rotate a 32-bit number to the left.
//
function bit_rol(num, cnt)
{
return (num << cnt) | (num >>> (32 - cnt));
}
return rstr2hex(rstr_sha1(str2rstr_utf8(s)));
}-*/;
}
I'm using it in my client side sha generation and it worked well.

converting Biginteger to Bytearray(Raw data)

I have used the following code for converting the bigint in decimal to bytearray (raw data), but I'm getting wrong result.
What is the mistake here?
I'm trying this in Apple Mac ( for Iphone app)
COMP_BYTE_SIZE is 4
Is there any bigendian/ little endian issue, please Help.
void bi_export(BI_CTX *ctx, bigint *x, uint8_t *data, int size)
{
int i, j, k = size-1;
check(x);
memset(data, 0, size); /* ensure all leading 0's are cleared */
for (i = 0; i < x->size; i++)
{
for (j = 0; j < COMP_BYTE_SIZE; j++)
{
comp mask = 0xff << (j*8);
int num = (x->comps[i] & mask) >> (j*8);
data[k--] = num;
if (k < 0)
{
break;
}
}
}
Thanks.
The argument size is at least x->size*4, ie. the target array is big enough? Also use
comp mask = (comp)0xff << (j*8);
num should be cast to uint8_t before copy
data[k--] = (uint8_t)num;

Formatting a (large) number "12345" to "12,345"

Say I have a large number (integer or float) like 12345 and I want it to look like 12,345.
How would I accomplish that?
I'm trying to do this for an iPhone app, so something in Objective-C or C would be nice.
Here is the answer.
NSNumber* number = [NSNumber numberWithDouble:10000000];
NSNumberFormatter *numberFormatter = [[NSNumberFormatter alloc] init];
[numberFormatter setNumberStyle:kCFNumberFormatterDecimalStyle];
[numberFormatter setGroupingSeparator:#","];
NSString* commaString = [numberFormatter stringForObjectValue:number];
[numberFormatter release];
NSLog(#"%# -> %#", number, commaString);
Try using an NSNumberFormatter.
This should allow you to handle this correctly on an iPhone. Make sure you use the 10.4+ style, though. From that page:
"iPhone OS: The v10.0 compatibility mode is not available on iPhone OS—only the 10.4 mode is available."
At least on Mac OS X, you can just use the "'" string formatter with printf(3).
$ man 3 printf
`'' Decimal conversions (d, u, or i) or the integral portion
of a floating point conversion (f or F) should be
grouped and separated by thousands using the non-mone-
tary separator returned by localeconv(3).
as in printf("%'6d",1000000);
Cleaner C code
// write integer value in ASCII into buf of size bufSize, inserting commas at tousands
// character string in buf is terminated by 0.
// return length of character string or bufSize+1 if buf is too small.
size_t int2str( char *buf, size_t bufSize, int val )
{
char *p;
size_t len, neg;
// handle easy case of value 0 first
if( val == 0 )
{
a[0] = '0';
a[1] = '\0';
return 1;
}
// extract sign of value and set val to absolute value
if( val < 0 )
{
val = -val;
neg = 1;
}
else
neg = 0;
// initialize encoding
p = buf + bufSize;
*--p = '\0';
len = 1;
// while the buffer is not yet full
while( len < bufSize )
{
// put front next digit
*--p = '0' + val % 10;
val /= 10;
++len;
// if the value has become 0 we are done
if( val == 0 )
break;
// increment length and if it's a multiple of 3 put front a comma
if( (len % 3) == 0 )
*--p = ',';
}
// if buffer is too small return bufSize +1
if( len == bufSize && (val > 0 || neg == 1) )
return bufSize + 1;
// add negative sign if required
if( neg == 1 )
{
*--p = '-';
++len;
}
// move string to front of buffer if required
if( p != buf )
while( *buf++ = *p++ );
// return encoded string length not including \0
return len-1;
}
I did this for an iPhone game recently. I was using the built-in LCD font, which is a monospaced font. I formatted the numbers, ignoring the commas, then stuck the commas in afterward. (The way calculators do it, where the comma is not considered a character.)
Check out the screenshots at RetroJuJu. Sorry--they aren't full-sized screenshots so you'll have to squint!
Hope that helps you (it's in C) :
char* intToFormat(int a)
{
int nb = 0;
int i = 1;
char* res;
res = (char*)malloc(12*sizeof(char));
// Should be enough to get you in the billions. Get it higher if you need
// to use bigger numbers.
while(a > 0)
{
if( nb > 3 && nb%3 == 0)
res[nb++] = ',';
// Get the code for the '0' char and add it the position of the
// number to add (ex: '0' + 5 = '5')
res[nb] = '0' + a%10;
nb++;
a /= 10;
}
reverse(&res);
return res;
}
There might be a few errors I didn't see (I'm blind when it comes to this...)
It's like an enhanced iToA so maybe it's not the best solution.
Use recursion, Luke:
#include <stdio.h>
#include <stdlib.h>
static int sprint64u( char* buffer, unsigned __int64 x) {
unsigned __int64 quot = x / 1000;
int chars_written;
if ( quot != 0) {
chars_written = sprint64u( buffer, quot);
chars_written += sprintf( buffer + chars_written, ".%03u", ( unsigned int)( x % 1000));
}
else {
chars_written = sprintf( buffer, "%u", ( unsigned int)( x % 1000));
}
return chars_written;
}
int main( void) {
char buffer[ 32];
sprint64u( buffer, 0x100000000ULL);
puts( buffer);
return EXIT_SUCCESS;
}