Related
If I encode a string like this:
var escapedString = originalString.stringByAddingPercentEscapesUsingEncoding(NSUTF8StringEncoding)
it doesn't escape the slashes /.
I've searched and found this Objective C code:
NSString *encodedString = (NSString *)CFURLCreateStringByAddingPercentEscapes(
NULL,
(CFStringRef)unencodedString,
NULL,
(CFStringRef)#"!*'();:#&=+$,/?%#[]",
kCFStringEncodingUTF8 );
Is there an easier way to encode an URL and if not, how do I write this in Swift?
Swift 3
In Swift 3 there is addingPercentEncoding
let originalString = "test/test"
let escapedString = originalString.addingPercentEncoding(withAllowedCharacters: .urlHostAllowed)
print(escapedString!)
Output:
test%2Ftest
Swift 1
In iOS 7 and above there is stringByAddingPercentEncodingWithAllowedCharacters
var originalString = "test/test"
var escapedString = originalString.stringByAddingPercentEncodingWithAllowedCharacters(.URLHostAllowedCharacterSet())
println("escapedString: \(escapedString)")
Output:
test%2Ftest
The following are useful (inverted) character sets:
URLFragmentAllowedCharacterSet "#%<>[\]^`{|}
URLHostAllowedCharacterSet "#%/<>?#\^`{|}
URLPasswordAllowedCharacterSet "#%/:<>?#[\]^`{|}
URLPathAllowedCharacterSet "#%;<>?[\]^`{|}
URLQueryAllowedCharacterSet "#%<>[\]^`{|}
URLUserAllowedCharacterSet "#%/:<>?#[\]^`
If you want a different set of characters to be escaped create a set:
Example with added "=" character:
var originalString = "test/test=42"
var customAllowedSet = NSCharacterSet(charactersInString:"=\"#%/<>?#\\^`{|}").invertedSet
var escapedString = originalString.stringByAddingPercentEncodingWithAllowedCharacters(customAllowedSet)
println("escapedString: \(escapedString)")
Output:
test%2Ftest%3D42
Example to verify ascii characters not in the set:
func printCharactersInSet(set: NSCharacterSet) {
var characters = ""
let iSet = set.invertedSet
for i: UInt32 in 32..<127 {
let c = Character(UnicodeScalar(i))
if iSet.longCharacterIsMember(i) {
characters = characters + String(c)
}
}
print("characters not in set: \'\(characters)\'")
}
You can use URLComponents to avoid having to manually percent encode your query string:
let scheme = "https"
let host = "www.google.com"
let path = "/search"
let queryItem = URLQueryItem(name: "q", value: "Formula One")
var urlComponents = URLComponents()
urlComponents.scheme = scheme
urlComponents.host = host
urlComponents.path = path
urlComponents.queryItems = [queryItem]
if let url = urlComponents.url {
print(url) // "https://www.google.com/search?q=Formula%20One"
}
extension URLComponents {
init(scheme: String = "https",
host: String = "www.google.com",
path: String = "/search",
queryItems: [URLQueryItem]) {
self.init()
self.scheme = scheme
self.host = host
self.path = path
self.queryItems = queryItems
}
}
let query = "Formula One"
if let url = URLComponents(queryItems: [URLQueryItem(name: "q", value: query)]).url {
print(url) // https://www.google.com/search?q=Formula%20One
}
Swift 4 & 5
To encode a parameter in URL I find using .alphanumerics character set the easiest option:
let urlEncoded = value.addingPercentEncoding(withAllowedCharacters: .alphanumerics)
let url = "http://www.example.com/?name=\(urlEncoded!)"
Using any of the standard Character Sets for URL Encoding (like .urlQueryAllowed or .urlHostAllowed) won't work, because they do not exclude = or & characters.
Note that by using .alphanumerics it will encode some characters that do not need to be encoded (like -, ., _ or ~ -– see 2.3. Unreserved characters in RFC 3986). I find using .alphanumerics simpler than constructing a custom character set and do not mind some additional characters to be encoded. If that bothers you, construct a custom character set as is described in How to percent encode a URL String, like for example:
// Store allowed character set for reuse (computed lazily).
private let urlAllowed: CharacterSet =
.alphanumerics.union(.init(charactersIn: "-._~")) // as per RFC 3986
extension String {
var urlEncoded: String? {
return addingPercentEncoding(withAllowedCharacters: urlAllowed)
}
}
let url = "http://www.example.com/?name=\(value.urlEncoded!)"
Warning: The urlEncoded parameter is force unwrapped. For invalid unicode string it might crash. See Why is the return value of String.addingPercentEncoding() optional?. Instead of force unwrapping urlEncoded! you can use urlEncoded ?? "" or if let urlEncoded = urlEncoded { ... }.
Swift 5:
extension String {
var urlEncoded: String? {
let allowedCharacterSet = CharacterSet.alphanumerics.union(CharacterSet(charactersIn: "~-_."))
return self.addingPercentEncoding(withAllowedCharacters: allowedCharacterSet)
}
}
print("\u{48}ello\u{9}world\u{7}\u{0}".urlEncoded!) // prints Hello%09world%07%00
print("The string ü#foo-bar".urlEncoded!) // prints The%20string%20%C3%BC%40foo-bar
Swift 3:
let originalString = "http://www.ihtc.cc?name=htc&title=iOS开发工程师"
1. encodingQuery:
let escapedString = originalString.addingPercentEncoding(withAllowedCharacters:NSCharacterSet.urlQueryAllowed)
result:
"http://www.ihtc.cc?name=htc&title=iOS%E5%BC%80%E5%8F%91%E5%B7%A5%E7%A8%8B%E5%B8%88"
2. encodingURL:
let escapedString = originalString.addingPercentEncoding(withAllowedCharacters: .urlHostAllowed)
result:
"http:%2F%2Fwww.ihtc.cc%3Fname=htc&title=iOS%E5%BC%80%E5%8F%91%E5%B7%A5%E7%A8%8B%E5%B8%88"
Swift 4 & 5 (Thanks #sumizome for suggestion. Thanks #FD_ and #derickito for testing)
var allowedQueryParamAndKey = NSCharacterSet.urlQueryAllowed
allowedQueryParamAndKey.remove(charactersIn: ";/?:#&=+$, ")
paramOrKey.addingPercentEncoding(withAllowedCharacters: allowedQueryParamAndKey)
Swift 3
let allowedQueryParamAndKey = NSCharacterSet.urlQueryAllowed.remove(charactersIn: ";/?:#&=+$, ")
paramOrKey.addingPercentEncoding(withAllowedCharacters: allowedQueryParamAndKey)
Swift 2.2 (Borrowing from Zaph's and correcting for url query key and parameter values)
var allowedQueryParamAndKey = NSCharacterSet(charactersInString: ";/?:#&=+$, ").invertedSet
paramOrKey.stringByAddingPercentEncodingWithAllowedCharacters(allowedQueryParamAndKey)
Example:
let paramOrKey = "https://some.website.com/path/to/page.srf?a=1&b=2#top"
paramOrKey.addingPercentEncoding(withAllowedCharacters: allowedQueryParamAndKey)
// produces:
"https%3A%2F%2Fsome.website.com%2Fpath%2Fto%2Fpage.srf%3Fa%3D1%26b%3D2%23top"
This is a shorter version of Bryan Chen's answer. I'd guess that urlQueryAllowed is allowing the control characters through which is fine unless they form part of the key or value in your query string at which point they need to be escaped.
Swift 4:
It depends by the encoding rules followed by your server.
Apple offer this class method, but it don't report wich kind of RCF protocol it follows.
var escapedString = originalString.addingPercentEncoding(withAllowedCharacters: .urlHostAllowed)!
Following this useful tool you should guarantee the encoding of these chars for your parameters:
$ (Dollar Sign) becomes %24
& (Ampersand) becomes %26
+ (Plus) becomes %2B
, (Comma) becomes %2C
: (Colon) becomes %3A
; (Semi-Colon) becomes %3B
= (Equals) becomes %3D
? (Question Mark) becomes %3F
# (Commercial A / At) becomes %40
In other words, speaking about URL encoding, you should following the RFC 1738 protocol.
And Swift don't cover the encoding of the + char for example, but it works well with these three # : ? chars.
So, to correctly encoding each your parameter , the .urlHostAllowed option is not enough, you should add also the special chars as for example:
encodedParameter = parameter.replacingOccurrences(of: "+", with: "%2B")
Hope this helps someone who become crazy to search these informations.
Everything is same
var str = CFURLCreateStringByAddingPercentEscapes(
nil,
"test/test",
nil,
"!*'();:#&=+$,/?%#[]",
CFStringBuiltInEncodings.UTF8.rawValue
)
// test%2Ftest
Swift 4.2
A quick one line solution. Replace originalString with the String you want to encode.
var encodedString = originalString.addingPercentEncoding(withAllowedCharacters: CharacterSet(charactersIn: "!*'();:#&=+$,/?%#[]{} ").inverted)
Online Playground Demo
This is working for me in Swift 5. The usage case is taking a URL from the clipboard or similar which may already have escaped characters but which also contains Unicode characters which could cause URLComponents or URL(string:) to fail.
First, create a character set that includes all URL-legal characters:
extension CharacterSet {
/// Characters valid in at least one part of a URL.
///
/// These characters are not allowed in ALL parts of a URL; each part has different requirements. This set is useful for checking for Unicode characters that need to be percent encoded before performing a validity check on individual URL components.
static var urlAllowedCharacters: CharacterSet {
// Start by including hash, which isn't in any set
var characters = CharacterSet(charactersIn: "#")
// All URL-legal characters
characters.formUnion(.urlUserAllowed)
characters.formUnion(.urlPasswordAllowed)
characters.formUnion(.urlHostAllowed)
characters.formUnion(.urlPathAllowed)
characters.formUnion(.urlQueryAllowed)
characters.formUnion(.urlFragmentAllowed)
return characters
}
}
Next, extend String with a method to encode URLs:
extension String {
/// Converts a string to a percent-encoded URL, including Unicode characters.
///
/// - Returns: An encoded URL if all steps succeed, otherwise nil.
func encodedUrl() -> URL? {
// Remove preexisting encoding,
guard let decodedString = self.removingPercentEncoding,
// encode any Unicode characters so URLComponents doesn't choke,
let unicodeEncodedString = decodedString.addingPercentEncoding(withAllowedCharacters: .urlAllowedCharacters),
// break into components to use proper encoding for each part,
let components = URLComponents(string: unicodeEncodedString),
// and reencode, to revert decoding while encoding missed characters.
let percentEncodedUrl = components.url else {
// Encoding failed
return nil
}
return percentEncodedUrl
}
}
Which can be tested like:
let urlText = "https://www.example.com/폴더/search?q=123&foo=bar&multi=eggs+and+ham&hangul=한글&spaced=lovely%20spam&illegal=<>#top"
let url = encodedUrl(from: urlText)
Value of url at the end: https://www.example.com/%ED%8F%B4%EB%8D%94/search?q=123&foo=bar&multi=eggs+and+ham&hangul=%ED%95%9C%EA%B8%80&spaced=lovely%20spam&illegal=%3C%3E#top
Note that both %20 and + spacing are preserved, Unicode characters are encoded, the %20 in the original urlText is not double encoded, and the anchor (fragment, or #) remains.
Edit: Now checking for validity of each component.
For Swift 5 to endcode string
func escape(string: String) -> String {
let allowedCharacters = string.addingPercentEncoding(withAllowedCharacters: CharacterSet(charactersIn: ":=\"#%/<>?#\\^`{|}").inverted) ?? ""
return allowedCharacters
}
How to use ?
let strEncoded = self.escape(string: "http://www.edamam.com/ontologies/edamam.owl#recipe_e2a1b9bf2d996cbd9875b80612ed9aa4")
print("escapedString: \(strEncoded)")
Had need of this myself, so I wrote a String extension that both allows for URLEncoding strings, as well as the more common end goal, converting a parameter dictionary into "GET" style URL Parameters:
extension String {
func URLEncodedString() -> String? {
var escapedString = self.addingPercentEncoding(withAllowedCharacters: .urlHostAllowed)
return escapedString
}
static func queryStringFromParameters(parameters: Dictionary<String,String>) -> String? {
if (parameters.count == 0)
{
return nil
}
var queryString : String? = nil
for (key, value) in parameters {
if let encodedKey = key.URLEncodedString() {
if let encodedValue = value.URLEncodedString() {
if queryString == nil
{
queryString = "?"
}
else
{
queryString! += "&"
}
queryString! += encodedKey + "=" + encodedValue
}
}
}
return queryString
}
}
Enjoy!
This one is working for me.
func stringByAddingPercentEncodingForFormData(plusForSpace: Bool=false) -> String? {
let unreserved = "*-._"
let allowed = NSMutableCharacterSet.alphanumericCharacterSet()
allowed.addCharactersInString(unreserved)
if plusForSpace {
allowed.addCharactersInString(" ")
}
var encoded = stringByAddingPercentEncodingWithAllowedCharacters(allowed)
if plusForSpace {
encoded = encoded?.stringByReplacingOccurrencesOfString(" ", withString: "+")
}
return encoded
}
I found above function from this link: http://useyourloaf.com/blog/how-to-percent-encode-a-url-string/.
let Url = URL(string: urlString.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? "")
None of these answers worked for me. Our app was crashing when a url contained non-English characters.
let unreserved = "-._~/?%$!:"
let allowed = NSMutableCharacterSet.alphanumeric()
allowed.addCharacters(in: unreserved)
let escapedString = urlString.addingPercentEncoding(withAllowedCharacters: allowed as CharacterSet)
Depending on the parameters of what you are trying to do, you may want to just create your own character set. The above allows for english characters, and -._~/?%$!:
What helped me was that I created a separate NSCharacterSet and used it on an UTF-8 encoded string i.e. textToEncode to generate the required result:
var queryCharSet = NSCharacterSet.urlQueryAllowed
queryCharSet.remove(charactersIn: "+&?,:;#+=$*()")
let utfedCharacterSet = String(utf8String: textToEncode.cString(using: .utf8)!)!
let encodedStr = utfedCharacterSet.addingPercentEncoding(withAllowedCharacters: queryCharSet)!
let paramUrl = "https://api.abc.eu/api/search?device=true&query=\(escapedStr)"
SWIFT 4.2
Sometimes this happened just because there is space in slug OR absence of URL encoding for parameters passing through API URL.
let myString = self.slugValue
let csCopy = CharacterSet(bitmapRepresentation: CharacterSet.urlPathAllowed.bitmapRepresentation)
let escapedString = myString!.addingPercentEncoding(withAllowedCharacters: csCopy)!
//always "info:hello%20world"
print(escapedString)
NOTE : Don't forget to explore about bitmapRepresentation.
version:Swift 5
// space convert to +
let mstring = string.replacingOccurrences(of: " ", with: "+")
// remove special character
var allowedQueryParamAndKey = NSCharacterSet.urlQueryAllowed
allowedQueryParamAndKey.remove(charactersIn: "!*'\"();:#&=+$,/?%#[]%")
return mstring.addingPercentEncoding(withAllowedCharacters: allowedQueryParamAndKey) ?? mstring
Swift 5
You can try .afURLQueryAllowed option if you want to encode string like below
let testString = "6hAD9/RjY+SnGm&B"
let escodedString = testString.addingPercentEncoding(withAllowedCharacters: .afURLQueryAllowed)
print(escodedString!)
//encoded string will be like en6hAD9%2FRjY%2BSnGm%26B
I am having an issue understanding the difference between Range-String.Index- and String.Index
For example:
func returnHtmlContent() -> String {
let filePath = URL(string:"xxx.htm")
filePath?.startAccessingSecurityScopedResource();
let htmlFile = Bundle.main.path(forResource: "getData", ofType: "htm");
let html = try! String(contentsOfFile: htmlFile!, encoding: String.Encoding.utf8);
filePath?.stopAccessingSecurityScopedResource();
return html;
};
func refactorHtml(content: String) -> String {
let StartingString = "<div id=\"1\">";
let EndingString = "</div></form>";
func selectString() -> String {
var htmlContent = returnHtmlContent();
let charLocationStart = htmlContent.range(of: StartingString);
let charLocationEnd = htmlContent.range(of: EndingString);
htmlContent.remove(at: charLocationStart);
return htmlContent;
};
let formattedBody = selectString();
return formattedBody;
};
refactorHtml(content: returnHtmlContent());
The idea in pseudocode
Generated HTMLBody
Pass To Function that formats
Remove all characters before StartingString
Remove all Characters After EndingString
Send NewString to Variable
Now - when I try to find the index position I cant seem to get the right value type, this is the error I am getting
Cannot convert value of type 'Range<String.Index>?' to expected argument type 'String.Index'
This is running in a playground
String indices aren't integers. They're opaque objects (of type String.Index) which can be used to subscript into a String to obtain a character.
Ranges aren't limited to only Range<Int>. If you look at the declaration of Range, you can see it's generic over any Bound, so long as the Bound is Comparable (which String.Index is).
So a Range<String.Index> is just that. It's a range of string indices, and just like any other range, it has a lowerBound, and an upperBound.
I want to check whether my filename with just prefix is exist or not in Swift.
E.g
My file name is like Companies_12344
So after _ values are dynamic but "Companies_" is static.
How can i do that?
My code below For split
func splitFilename(str: String) -> (name: String, ext: String)? {
if let rDotIdx = find(reverse(str), "_")
{
let dotIdx = advance(str.endIndex, -rDotIdx)
let fname = str[str.startIndex..<advance(dotIdx, -1)]
println("splitFilename >> Split File Name >>\(fname)")
}
return nil
}
It's not very clear what you want to do, because your code snippet already does check if the string has the prefix.
There's a simpler way, though:
let fileName = "Companies_12344"
if fileName.hasPrefix("Companies") {
println("Yes, this one has 'Companies' as a prefix")
}
Swift's hasPrefix method checks if the string begins with the specified string.
Also, you could split the string easily with this:
let compos = fileName.componentsSeparatedByString("_") // ["Companies", "12344"]
Then you could check if there's a code and grab it with:
if let fileCode = compos.last {
println("There was a code after the prefix: \(fileCode)")
}
I need a simple thing: encode a string in base64. I found an example:
extern crate serialize;
use serialize::base64::{mod, ToBase64};
use serialize::hex::FromHex;
fn main() {
let input = "49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d";
let result = input.from_hex().unwrap().as_slice().to_base64(base64::STANDARD);
println!("{}", result);
}
Which seams to work but I don't understand why input contains only characters in HEX. Moreover, this Python code produces a different result:
base64.b64encode(input) # =>
'NDkyNzZkMjA2YjY5NmM2YzY5NmU2NzIwNzk2Zjc1NzIyMDYyNzI2MTY5NmUyMDZjNjk2YjY1MjA2MTIwNzA2ZjY5NzM2ZjZlNmY3NTczMjA2ZDc1NzM2ODcyNmY2ZjZk'
So I decided to do the following:
//....
let input = "some string 123";
let result2 = input.unwrap().as_slice().to_base64(base64::STANDARD);
let result3 = input.as_slice().to_base64(base64::STANDARD);
And it didn't compile due to the errors:
error: type `&str` does not implement any method in scope named `unwrap`
test1.rs:9 let result2 = input.unwrap().as_slice().to_base64(base64::STANDARD);
^~~~~~~~
test1.rs:9:34: 9:44 error: multiple applicable methods in scope [E0034]
So how do I encode a simple string in base64?
If you don't have hex input, try this:
let result = input.as_bytes().to_base64(base64::STANDARD);
to_base64 is only defined for a slice of bytes so you have to first call as_bytes on the string:
extern crate serialize;
use serialize::base64::{mod, ToBase64};
fn main() {
let input = "49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d";
let result = input.as_bytes().to_base64(base64::STANDARD);
println!("{}", result);
}
Input has the type &static str:
let input = "some string 123";
There is no unwrap defined for &'static str:
let result2 = input.unwrap().as_slice().to_base64(base64::STANDARD);
You already have a slice (&str) but you need &[u8]:
let result3 = input.as_slice().to_base64(base64::STANDARD);
We know we can print each character in UTF8 code units?
Then, if we have code units of these characters, how can we create a String with them?
With Swift 5, you can choose one of the following ways in order to convert a collection of UTF-8 code units into a string.
#1. Using String's init(_:) initializer
If you have a String.UTF8View instance (i.e. a collection of UTF-8 code units) and want to convert it to a string, you can use init(_:) initializer. init(_:) has the following declaration:
init(_ utf8: String.UTF8View)
Creates a string corresponding to the given sequence of UTF-8 code units.
The Playground sample code below shows how to use init(_:):
let string = "Café 🇫🇷"
let utf8View: String.UTF8View = string.utf8
let newString = String(utf8View)
print(newString) // prints: Café 🇫🇷
#2. Using Swift's init(decoding:as:) initializer
init(decoding:as:) creates a string from the given Unicode code units collection in the specified encoding:
let string = "Café 🇫🇷"
let codeUnits: [Unicode.UTF8.CodeUnit] = Array(string.utf8)
let newString = String(decoding: codeUnits, as: UTF8.self)
print(newString) // prints: Café 🇫🇷
Note that init(decoding:as:) also works with String.UTF8View parameter:
let string = "Café 🇫🇷"
let utf8View: String.UTF8View = string.utf8
let newString = String(decoding: utf8View, as: UTF8.self)
print(newString) // prints: Café 🇫🇷
#3. Using transcode(_:from:to:stoppingOnError:into:) function
The following example transcodes the UTF-8 representation of an initial string into Unicode scalar values (UTF-32 code units) that can be used to build a new string:
let string = "Café 🇫🇷"
let bytes = Array(string.utf8)
var newString = ""
_ = transcode(bytes.makeIterator(), from: UTF8.self, to: UTF32.self, stoppingOnError: true, into: {
newString.append(String(Unicode.Scalar($0)!))
})
print(newString) // prints: Café 🇫🇷
#4. Using Array's withUnsafeBufferPointer(_:) method and String's init(cString:) initializer
init(cString:) has the following declaration:
init(cString: UnsafePointer<CChar>)
Creates a new string by copying the null-terminated UTF-8 data referenced by the given pointer.
The following example shows how to use init(cString:) with a pointer to the content of a CChar array (i.e. a well-formed UTF-8 code unit sequence) in order to create a string from it:
let bytes: [CChar] = [67, 97, 102, -61, -87, 32, -16, -97, -121, -85, -16, -97, -121, -73, 0]
let newString = bytes.withUnsafeBufferPointer({ (bufferPointer: UnsafeBufferPointer<CChar>)in
return String(cString: bufferPointer.baseAddress!)
})
print(newString) // prints: Café 🇫🇷
#5. Using Unicode.UTF8's decode(_:) method
To decode a code unit sequence, call decode(_:) repeatedly until it returns UnicodeDecodingResult.emptyInput:
let string = "Café 🇫🇷"
let codeUnits = Array(string.utf8)
var codeUnitIterator = codeUnits.makeIterator()
var utf8Decoder = Unicode.UTF8()
var newString = ""
Decode: while true {
switch utf8Decoder.decode(&codeUnitIterator) {
case .scalarValue(let value):
newString.append(Character(Unicode.Scalar(value)))
case .emptyInput:
break Decode
case .error:
print("Decoding error")
break Decode
}
}
print(newString) // prints: Café 🇫🇷
#6. Using String's init(bytes:encoding:) initializer
Foundation gives String a init(bytes:encoding:) initializer that you can use as indicated in the Playground sample code below:
import Foundation
let string = "Café 🇫🇷"
let bytes: [Unicode.UTF8.CodeUnit] = Array(string.utf8)
let newString = String(bytes: bytes, encoding: String.Encoding.utf8)
print(String(describing: newString)) // prints: Optional("Café 🇫🇷")
It's possible to convert UTF8 code points to a Swift String idiomatically using the UTF8 Swift class. Although it's much easier to convert from String to UTF8!
import Foundation
public class UTF8Encoding {
public static func encode(bytes: Array<UInt8>) -> String {
var encodedString = ""
var decoder = UTF8()
var generator = bytes.generate()
var finished: Bool = false
do {
let decodingResult = decoder.decode(&generator)
switch decodingResult {
case .Result(let char):
encodedString.append(char)
case .EmptyInput:
finished = true
/* ignore errors and unexpected values */
case .Error:
finished = true
default:
finished = true
}
} while (!finished)
return encodedString
}
public static func decode(str: String) -> Array<UInt8> {
var decodedBytes = Array<UInt8>()
for b in str.utf8 {
decodedBytes.append(b)
}
return decodedBytes
}
}
func testUTF8Encoding() {
let testString = "A UTF8 String With Special Characters: 😀🍎"
let decodedArray = UTF8Encoding.decode(testString)
let encodedString = UTF8Encoding.encode(decodedArray)
XCTAssert(encodedString == testString, "UTF8Encoding is lossless: \(encodedString) != \(testString)")
}
Of the other alternatives suggested:
Using NSString invokes the Objective-C bridge;
Using UnicodeScalar is error-prone because it converts UnicodeScalars directly to Characters, ignoring complex grapheme clusters; and
Using String.fromCString is potentially unsafe as it uses pointers.
improve on Martin R's answer
import AppKit
let utf8 : CChar[] = [65, 66, 67, 0]
let str = NSString(bytes: utf8, length: utf8.count, encoding: NSUTF8StringEncoding)
println(str) // Output: ABC
import AppKit
let utf8 : UInt8[] = [0xE2, 0x82, 0xAC, 0]
let str = NSString(bytes: utf8, length: utf8.count, encoding: NSUTF8StringEncoding)
println(str) // Output: €
What happened is Array can be automatic convert to CConstVoidPointer which can be used to create string with NSSString(bytes: CConstVoidPointer, length len: Int, encoding: Uint)
Swift 3
let s = String(bytes: arr, encoding: .utf8)
I've been looking for a comprehensive answer regarding string manipulation in Swift myself. Relying on cast to and from NSString and other unsafe pointer magic just wasn't doing it for me. Here's a safe alternative:
First, we'll want to extend UInt8. This is the primitive type behind CodeUnit.
extension UInt8 {
var character: Character {
return Character(UnicodeScalar(self))
}
}
This will allow us to do something like this:
let codeUnits: [UInt8] = [
72, 69, 76, 76, 79
]
let characters = codeUnits.map { $0.character }
let string = String(characters)
// string prints "HELLO"
Equipped with this extension, we can now being modifying strings.
let string = "ABCDEFGHIJKLMONP"
var modifiedCharacters = [Character]()
for (index, utf8unit) in string.utf8.enumerate() {
// Insert a "-" every 4 characters
if index > 0 && index % 4 == 0 {
let separator: UInt8 = 45 // "-" in ASCII
modifiedCharacters.append(separator.character)
}
modifiedCharacters.append(utf8unit.character)
}
let modifiedString = String(modifiedCharacters)
// modified string == "ABCD-EFGH-IJKL-MONP"
// Swift4
var units = [UTF8.CodeUnit]()
//
// update units
//
let str = String(decoding: units, as: UTF8.self)
I would do something like this, it may be not such elegant than working with 'pointers' but it does the job well, those are pretty much about a bunch of new += operators for String like:
#infix func += (inout lhs: String, rhs: (unit1: UInt8)) {
lhs += Character(UnicodeScalar(UInt32(rhs.unit1)))
}
#infix func += (inout lhs: String, rhs: (unit1: UInt8, unit2: UInt8)) {
lhs += Character(UnicodeScalar(UInt32(rhs.unit1) << 8 | UInt32(rhs.unit2)))
}
#infix func += (inout lhs: String, rhs: (unit1: UInt8, unit2: UInt8, unit3: UInt8, unit4: UInt8)) {
lhs += Character(UnicodeScalar(UInt32(rhs.unit1) << 24 | UInt32(rhs.unit2) << 16 | UInt32(rhs.unit3) << 8 | UInt32(rhs.unit4)))
}
NOTE: you can extend the list of the supported operators with overriding + operator as well, defining a list of the fully commutative operators for String.
and now you are able to append a String with a unicode (UTF-8, UTF-16 or UTF-32) character like e.g.:
var string: String = "signs of the Zodiac: "
string += (0x0, 0x0, 0x26, 0x4b)
string += (38)
string += (0x26, 76)
This is a possible solution (now updated for Swift 2):
let utf8 : [CChar] = [65, 66, 67, 0]
if let str = utf8.withUnsafeBufferPointer( { String.fromCString($0.baseAddress) }) {
print(str) // Output: ABC
} else {
print("Not a valid UTF-8 string")
}
Within the closure, $0 is a UnsafeBufferPointer<CChar> pointing to the array's contiguous storage. From that a Swift String can be created.
Alternatively, if you prefer the input as unsigned bytes:
let utf8 : [UInt8] = [0xE2, 0x82, 0xAC, 0]
if let str = utf8.withUnsafeBufferPointer( { String.fromCString(UnsafePointer($0.baseAddress)) }) {
print(str) // Output: €
} else {
print("Not a valid UTF-8 string")
}
If you're starting with a raw buffer, such as from the Data object returned from a file handle (in this case, taken from a Pipe object):
let data = pipe.fileHandleForReading.readDataToEndOfFile()
var unsafePointer = UnsafeMutablePointer<UInt8>.allocate(capacity: data.count)
data.copyBytes(to: unsafePointer, count: data.count)
let output = String(cString: unsafePointer)
There is Swift 3.0 version of Martin R answer
public class UTF8Encoding {
public static func encode(bytes: Array<UInt8>) -> String {
var encodedString = ""
var decoder = UTF8()
var generator = bytes.makeIterator()
var finished: Bool = false
repeat {
let decodingResult = decoder.decode(&generator)
switch decodingResult {
case .scalarValue(let char):
encodedString += "\(char)"
case .emptyInput:
finished = true
case .error:
finished = true
}
} while (!finished)
return encodedString
}
public static func decode(str: String) -> Array<UInt8> {
var decodedBytes = Array<UInt8>()
for b in str.utf8 {
decodedBytes.append(b)
}
return decodedBytes
}
}
If you want show emoji from UTF-8 string, just user convertEmojiCodesToString method below. It is working properly for strings like "U+1F52B" (emoji) or "U+1F1E6 U+1F1F1" (country flag emoji)
class EmojiConverter {
static func convertEmojiCodesToString(_ emojiCodesString: String) -> String {
let emojies = emojiCodesString.components(separatedBy: " ")
var resultString = ""
for emoji in emojies {
var formattedCode = emoji
formattedCode.slice(from: 2, to: emoji.length)
formattedCode = formattedCode.lowercased()
if let charCode = UInt32(formattedCode, radix: 16),
let unicode = UnicodeScalar(charCode) {
let str = String(unicode)
resultString += "\(str)"
}
}
return resultString
}
}