swift string separation but include the - swift

I need to separate a string to a array of substring but need to include ",.?!" as the substring in Swift.
var sentence = "What is your name?" into
var words = ["What", "is", "your", "name", "?"]
I know I can use this to separate the white space, but I need the ".,?!" to be separated into a word in the words array. How can I do that?
var words = sentence.components(separatedBy: " ")
I only get ["What", "is", "your", "name?"]
I need to separate the ? at the end word, and make words array like this:
var words = ["What", "is", "your", "name", "?"]

You can enumerate your substrings in range using .byWords options, append the substring to your words array, get the substring range upperBound and the enclosed range upperBound, remove the white spaces on the resulting substring and append it to the words array:
import Foundation
let sentence = "What is your name?"
var words: [String] = []
sentence.enumerateSubstrings(in: sentence.startIndex..., options: .byWords) { substring, range, enclosedRange, _ in
words.append(substring!)
let start = range.upperBound
let end = enclosedRange.upperBound
words += sentence[start..<end]
.split{$0.isWhitespace}
.map(String.init)
}
print(words) // "["What", "is", "your", "name", "?"]\n"
You can also use a regular expression to replace the punctuation by the same punctuation preceded by a space before splitting your words by whitespaces:
let sentence = "What is your name?"
let words = sentence
.replacingOccurrences(of: "[.,;:?!]",
with: " $0",
options: .regularExpression)
.split{$0.isWhitespace}
print(words) // "["What", "is", "your", "name", "?"]\n"
Swift native approach:
var sentence = "What is your name?"
for index in sentence
.indices
.filter({ sentence[$0].isPunctuation })
.reversed() {
sentence.insert(" ", at: index)
}
let words = sentence.split { $0.isWhitespace }
words.forEach { print($0) }
This will print:
What
is
your
name
?

This function will split on whitespace and also include each punctuation character as a separate string. Apostrophes are treated as part of a word, so "can't" and "it's" are kept together as a single string. This function will also handle double spaces and tabs.
func splitSentence(sentence: String) -> [String] {
var result : [String] = []
var word = ""
let si = sentence.startIndex
for i in 0..<sentence.count {
let c = sentence[sentence.index(si, offsetBy: i)]
if c.isWhitespace {
if word.count > 0 {
result.append(word)
word = ""
}
} else if (c.isLetter || (String(c) == "'")) {
word = word + String(c)
} else {
if word.count > 0 {
result.append(word)
word = ""
}
result.append(String(c))
}
}
if word.count > 0 {
result.append(word)
}
return result
}
Here is some testing code:
func test(_ sentence: String, _ answer: [String]) {
print("--------------------------------")
print("sentence=" + sentence)
let result : [String] = splitSentence(sentence: sentence)
for s in result {
print("s={" + s + "}")
}
if answer.count != result.count {
print("#### Answer count mismatch")
}
for i in 0..<answer.count {
if answer[i] != result[i] {
print("### Mismatch: {" + answer[i] + "} != {" + result[i] + "}")
}
}
}
func runTests() {
test("", [])
test(" ", [])
test(" ", [])
test(" a", ["a"])
test("a ", ["a"])
test(" a", ["a"])
test(" a ", ["a"])
test("a ", ["a"])
test("aa", ["aa"])
test("a a", ["a", "a"])
test("?", ["?"])
test("a?", ["a", "?"])
test("???", ["?", "?", "?"])
test("What is your name?", [ "What", "is", "your", "name", "?" ])
test("What is your name? ", [ "What", "is", "your", "name", "?" ])
test("La niña es linda.", [ "La", "niña", "es", "linda", "."])
test("ñññ ñ ññ ñ", [ "ñññ", "ñ", "ññ", "ñ" ])
test("It's the 'best'.", [ "It's", "the", "'best'", "." ])
test("¿Cómo te llamas?", [ "¿", "Cómo", "te", "llamas", "?" ])
test("你好吗?", [ "你好吗", "?" ])
}

XCTAssertEqual(
"¿What is your name? My name is 🐱, and I am a cat!"
.split(separator: " ")
.flatMap { $0.split(includingSeparators: \.isPunctuation) }
.map(Array.init)
.map { String($0) },
[ "¿", "What", "is", "your", "name", "?",
"My", "name", "is", "🐱", ",", "and", "I", "am", "a", "cat", "!"
]
)
public enum Spliteration<Element> {
case separator(Element)
case subSequence([Element])
}
public extension Array {
init(_ spliteration: Spliteration<Element>) {
switch spliteration {
case .separator(let separator):
self = [separator]
case .subSequence(let array):
self = array
}
}
}
public extension Sequence {
/// The first element of the sequence.
/// - Note: `nil` if the sequence is empty.
var first: Element? {
var iterator = makeIterator()
return iterator.next()
}
func split(includingSeparators getIsSeparator: #escaping (Element) -> Bool)
-> AnySequence< Spliteration<Element> > {
var separatorFromPrefixIteration: Element?
func process(next: Element?) -> Void {
separatorFromPrefixIteration =
next.map(getIsSeparator) == true
? next
: nil
}
process(next: first)
let prefixIterator = AnyIterator(
dropFirst(
separatorFromPrefixIteration == nil
? 0
: 1
),
processNext: process
)
return .init {
if let separator = separatorFromPrefixIteration {
separatorFromPrefixIteration = nil
return .separator(separator)
}
return Optional(
prefixIterator.prefix { !getIsSeparator($0) },
nilWhen: \.isEmpty
).map(Spliteration.subSequence)
}
}
}
public extension AnyIterator {
/// Use when `AnyIterator` is required / `UnfoldSequence` can't be used.
init<State>(
state: State,
_ getNext: #escaping (inout State) -> Element?
) {
var state = state
self.init { getNext(&state) }
}
/// Process iterations with a closure.
/// - Parameters:
/// - processNext: Executes with every iteration.
init<Sequence: Swift.Sequence>(
_ sequence: Sequence,
processNext: #escaping (Element?) -> Void
) where Sequence.Element == Element {
self.init( state: sequence.makeIterator() ) { iterator -> Element? in
let next = iterator.next()
processNext(next)
return next
}
}
}
public extension AnySequence {
/// Use when `AnySequence` is required / `AnyIterator` can't be used.
/// - Parameter getNext: Executed as the `next` method of this sequence's iterator.
init(_ getNext: #escaping () -> Element?) {
self.init( Iterator(getNext) )
}
}
public extension Optional {
/// Wraps a value in an optional, based on a condition.
/// - Parameters:
/// - wrapped: A non-optional value.
/// - getIsNil: The condition that will result in `nil`.
init(
_ wrapped: Wrapped,
nilWhen getIsNil: (Wrapped) throws -> Bool
) rethrows {
self = try getIsNil(wrapped) ? nil : wrapped
}
}

Not answering using swift, but I believe the algorithm can be emulated with any language.
Done the implementation using Java. The code uses standard Java libraries and not external ones.
private void setSpecialCharsAsLastArrayItem() {
String name = "?what is your name?";
String regexCompilation = "[$&+,:;=?##|]";
Pattern regex = Pattern.compile(regexCompilation);
Matcher matcher = regex.matcher(name);
StringBuilder regexStr = new StringBuilder();
while (matcher.find()) {
regexStr.append(matcher.group());
}
String stringOfSpecialChars = regexStr.toString();
String stringWithoutSpecialChars = name.replaceAll(regexCompilation, "");
String finalString = stringWithoutSpecialChars + " "+stringOfSpecialChars;
String[] splitString = finalString.split(" ");
System.out.println(Arrays.toString(splitString));
}
will print [what, is, your, name, ??]

Here is the solution (Swift 5):
let sentence = "What is your name?".replacingOccurrences(of: "?", with: " ?")
let words = sentence.split(separator: " ")
print(words)
Output:
["What", "is", "your", "name", "?"]

Related

Reverse Words with exclusion rules like anagrams and numbers

Need some help!
I have a model function in a simple project for reversing words.
The questions is:
First - How can I reverse my words (sentences) to make an anagram? For example, I need to shift the first character in each reversed word on two positions by forward.
Example:
Original - Hello world
Reversed with anagrams: - lloeH lrdow
Second - How can I reverse my words (sentences) without numbers inside it?
Example:
Original - Hello world 1234
Reversed without numbers - olleH dlrow 1234
How can I do it in my code?
import Foundation
class ReverseWords {
public func reverse(textField: String) -> String {
if textField.isEmpty {
return ""
}
return textField.trimmingCharacters(in: .whitespacesAndNewlines)
.components(separatedBy: " ")
.map { String ( $0.reversed() ) }
.joined(separator: " ")
}
}
Check this out.
func anagram(sentence: String, separator: Character = " ") -> String {
sentence
.trimmingCharacters(in: .whitespacesAndNewlines)
.split(separator: separator)
.map { $0.contains(where: { $0.isNumber }) ? String($0) : String($0).reverseAndShift() }
.joined(separator: String(separator))
}
extension String {
func reverseAndShift(by offset: Int = 2) -> String {
var reversed = Array(reversed())
if let firstElement = reversed.first, offset < reversed.count {
reversed.removeFirst()
reversed.insert(firstElement, at: offset)
}
return String(reversed)
}
}
print(anagram(sentence: "Hello world")) -> lloeH lrdow
print(anagram(sentence: "Hello world 1234")) -> lloeH lrdow 1234
It’s helpful to break your problem into independently testable functions.
Here are two functions you should find useful:
func shift2(_ str: String) -> String {
guard str.count > 2 else { return str }
let first = str.prefix(1)
let secondthird = str.prefix(3).suffix(2)
let end = String(str.dropFirst(3))
return secondthird + first + end
}
func reverseWord(_ str: String) -> String {
guard Set(str).intersection(Set("0123456789")).isEmpty else { return str }
return String(str.reversed())
}
Examples
print(shift2("abcde"))
print(shift2("abc"))
print(shift2("ab"))
bcade
bca
ab
print(reverseWord("Hello"))
print(reverseWord("Hello1"))
print(reverseWord("12345"))
olleH
Hello1
12345

Turn Swift function into extension for reuse

I would like to remove specific characters from a string - instead of reentering the following across various string in my application, I would like to create an extension to easily reference across the code.
How can this be turned into an extension?
var string = "11224B"
let removeCharacters: Set<Character> = [" ", ";", ".", "!", "/"]
string.removeAll(where: { removeCharacters.contains($0) })
print(string)
What you need is to extend the protocol which requires you to implement removeAll(where:) method, in this case RangeReplaceableCollection and constrain Self to StringProtocol:
extension RangeReplaceableCollection where Self: StringProtocol {
mutating func remove(characters: Set<Element>) {
removeAll(where: characters.contains)
}
}
var string = "1/1!2.2;4 B"
string.remove(characters: [" ", ";", ".", "!", "/"])
print(string) // "11224B\n"
And the non mutating method as well but using filter instead. You just need to return Self:
extension RangeReplaceableCollection where Self: StringProtocol {
func removing(characters: Set<Element>) -> Self {
filter { !characters.contains($0) }
}
}
let string = "1/1!2.2;4 B"
string.removing(characters: [" ", ";", ".", "!", "/"]) // "11224B\n"
You can also make your method generic and allow any sequence type which its element is equal to the collection element type:
extension RangeReplaceableCollection where Self: StringProtocol {
mutating func remove<S: Sequence>(characters: S) where S.Element == Element {
removeAll(where: characters.contains)
}
func removing<S: Sequence>(characters: S) -> Self where S.Element == Element {
filter { !characters.contains($0) }
}
}
var string = "1/1!2.2;4 B"
let characters: Set<Character> = [" ", ";", ".", "!", "/"]
string.remove(characters: characters)
string // "11224B\n"
let string = "1/1!2.2;4 B"
let charactersString = " ;.!/"
string.removing(characters: charactersString) // "11224B\n"
Extension:
extension String {
func removeCharacters(from characterSet: CharacterSet) -> String {
let filteredString = self.unicodeScalars.filter { !characterSet.contains($0) }
return String(String.UnicodeScalarView(filteredString))
}
}
Usage:
var string1 = "1122 ;4B"
print(string1.removeCharacters(from: [" ", ";", ".", "!", "/"]))
Use removeCharacters as default parameter:
extension String {
func removeCharacters(from characterSet: CharacterSet = [" ", ";", ".", "!", "/"]) -> String {
let filteredString = self.unicodeScalars.filter { !characterSet.contains($0) }
return String(String.UnicodeScalarView(filteredString))
}
}
var string1 = "1122 ;4B"
print(string1.removeCharacters())

Converting string to Bool and testing for IF statement in Swift

So I am trying to take thee first index of a string (fullName) and test it to see if it matches all vowels lower and uppercased... for some reason when I use the .startIndex I test one letter at a time with a If statement.
Ia there a way to test all vowels at once ()I tried || in between each and it gave me the error "cannot convert string to Bool". Appreciate any help.
func lyricsForName(lyricsTemplate: String, fullName: String) -> String {
let shortName = shortNameFromName(name: fullName)
let index = fullName[fullName.startIndex]
if index== ("a","A"){
let lyrics = lyricsTemplate
.replacingOccurrences(of:"<FULL_NAME>", with: fullName)
.replacingOccurrences (of:"<SHORT_NAME>", with: fullName )
return lyrics
}else{
let lyrics = lyricsTemplate
.replacingOccurrences(of:"<FULL_NAME>", with: fullName)
.replacingOccurrences (of:"<SHORT_NAME>", with: shortName )
return lyrics
You can do it as previously answered, but this is a more Swifty way using the available APIs:
extension String {
var firstThreeLettersAreVowels: Bool {
guard characters.count >= 3 else {
return false
}
let firstThreeLetters = substring(to: index(startIndex, offsetBy: 3))
let isAllVowels = CharacterSet(charactersIn: firstThreeLetters).isSubset(of: CharacterSet.vowels)
return isAllVowels
}
var isLower: Bool {
return CharacterSet(charactersIn: self).isSubset(of: CharacterSet.lowercaseLetters)
}
var isUpper: Bool {
return CharacterSet(charactersIn: self).isDisjoint(with: CharacterSet.lowercaseLetters)
}
}
extension CharacterSet {
static var vowels: CharacterSet {
return CharacterSet(charactersIn: "AEIOUYaeiouy")
}
}
Or the one liner (without length check) is
extension String {
var firstThreeLettersAreVowels: Bool {
return CharacterSet(charactersIn: substring(to: index(startIndex, offsetBy: 3)))
.isSubset(of: CharacterSet(charactersIn: "AEIOUYaeiouy"))
}
}
This allows you rely on existing APIs rather than trying to do the work yourself, and it should be pretty fast.
Here is one way to do it. You can use an array of vowels and then test if the array contains the first character:
let fullName = "Albert"
let vowels = "aeiouAEIOU".characters
if let first = fullName.characters.first, vowels.contains(first) {
print("\(fullName) starts with a vowel")
}
Albert starts with a vowel
Note: Using fullName.characters.first is safer than fullName[fullName.startIndex] because the latter will crash for an empty String.
all kudos please to PEEJWEEJ answer
the characterset was his idea :)
i made it only more general
import Foundation
extension String {
func isFirstLetters(count: Int, of characterSet: CharacterSet) -> Bool {
guard characters.count >= count else {
return false
}
let firstLetters = substring(to: index(startIndex, offsetBy: count))
let isInSubset = CharacterSet(charactersIn: firstLetters).isSubset(of: characterSet
)
return isInSubset
}
}
extension CharacterSet {
static var vowels: CharacterSet {
return CharacterSet(charactersIn: "AEIOUYaeiouy")
}
}
some Test cases:
"abcDEf".isFirstLetters(count: 3, of: .vowels) // false
"aioDEf".isFirstLetters(count: 3, of: .vowels) // true
"abcDEf".isFirstLetters(count: 1, of: .vowels) // true
"bbcDEf".isFirstLetters(count: 1, of: .vowels) // false
"ibcDEf".isFirstLetters(count: 1, of: .vowels) // true
"ABcdef".isFirstLetters(count: 3, of: .uppercaseLetters) // false
"ABDdef".isFirstLetters(count: 3, of: .uppercaseLetters) // true
"abBABcdef".isFirstLetters(count: 3, of: .lowercaseLetters) // false
"abbABDdef".isFirstLetters(count: 3, of: .lowercaseLetters) // true
"ABD".isFirstLetters(count: 3, of: CharacterSet(charactersIn: "A"..."C")) // false
"ABD".isFirstLetters(count: 3, of: CharacterSet(charactersIn: "A"..."D")) // true
debug info:
extension CharacterSet {
var characters: [Character] {
var result: [Character] = []
for plane: UInt8 in 0...16 where self.hasMember(inPlane: plane) {
for unicode in UInt32(plane) << 16 ..< UInt32(plane + 1) << 16 {
if let uniChar = UnicodeScalar(unicode), self.contains(uniChar) {
result.append(Character(uniChar))
}
}
}
return result
}
}
print(CharacterSet.uppercaseLetters.description)
//<Foundation._SwiftNSCharacterSet: 0x6000000271a0>
print(String(CharacterSet.vowels.characters))
//AEIOUYaeiouy
print(CharacterSet.vowels.characters)
// ["A", "E", "I", "O", "U", "Y", "a", "e", "i", "o", "u", "y"]

Split string by components and keep components in place

Unlike string.components(separatedBy: ...) I want to keep the separators in place in the resulting array. Code is more explanatory
let input = "foo&bar|hello"
let output = string.tokenize(splitMarks: ["&", "|"])
let desiredResult = ["foo", "&", "bar", "|", "hello"]
Is there any function in the standard library which does this? If not how can I implement such a function?
For that you need to loop through the String and check its each characters that is it tokens or not. You can make extension of String for that like this.
extension String {
func stringTokens(splitMarks: Set<String>) -> [String] {
var string = ""
var desiredOutput = [String]()
for ch in self.characters {
if splitMarks.contains(String(ch)) {
if !string.isEmpty {
desiredOutput.append(string)
}
desiredOutput.append(String(ch))
string = ""
}
else {
string += String(ch)
}
}
if !string.isEmpty {
desiredOutput.append(string)
}
return desiredOutput
}
}
Now you can call this function like this way.
let input = "foo&bar|hello"
print(input.stringTokens(splitMarks: ["&", "|"]))
Output
["foo", "&", "bar", "|", "hello"]
You can use rangeOfCharacter(from: CharacterSet, ...) in a loop to
find the next occurrence of a split mark in the string, and then
append both the preceding part and the separator to an array:
extension String {
func tokenize(splitMarks: String) -> [Substring] {
let cs = CharacterSet(charactersIn: splitMarks)
var result = [Substring]()
var pos = startIndex
while let range = rangeOfCharacter(from: cs, range: pos..<endIndex) {
// Append string preceding the split mark:
if range.lowerBound != pos {
result.append(self[pos..<range.lowerBound])
}
// Append split mark:
result.append(self[range])
// Update position for next search:
pos = range.upperBound
}
// Append string following the last split mark:
if pos != endIndex {
result.append(self[pos..<endIndex])
}
return result
}
}
Example:
let input = "foo&bar|hello"
let output = input.tokenize(splitMarks: "&|")
print(output)
// ["foo", "&", "bar", "|", "hello"]

Split a String without removing the delimiter in Swift

This might be a duplicate. I couldn't find the answer in Swift, so I am not sure.
componentsSeparatedByCharactersInSet removes the delimiter. If you separate by only one possible character it is easy to add it back. But what when you have a set?
Is there another method to split?
Swift 3 and 4 Versions
extension Collection {
func splitAt(isSplit: (Iterator.Element) throws -> Bool) rethrows -> [SubSequence] {
var p = self.startIndex
var result:[SubSequence] = try self.indices.flatMap {
i in
guard try isSplit(self[i]) else {
return nil
}
defer {
p = self.index(after: i)
}
return self[p...i]
}
if p != self.endIndex {
result.append(suffix(from: p))
}
return result
}
}
Thanks to Oisdk for getting me thinking.
This method works on CollectionTypes, rather than Strings, but it should be easy enough to adapt:
extension CollectionType {
func splitAt(#noescape isSplit: Generator.Element throws -> Bool) rethrows -> [SubSequence] {
var p = startIndex
return try indices
.filter { i in try isSplit(self[i]) }
.map { i in
defer { p = i }
return self[p..<i]
} + [suffixFrom(p)]
}
}
extension CollectionType where Generator.Element : Equatable {
func splitAt(splitter: Generator.Element) -> [SubSequence] {
return splitAt { el in el == splitter }
}
}
You could use it like this:
let sentence = "Hello, my name is oisdk. This should split: but only at punctuation!"
let puncSet = Set("!.,:".characters)
sentence
.characters
.splitAt(puncSet.contains)
.map(String.init)
// ["Hello", ", my name is oisdk", ". This should split", ": but only at punctuation", "!"]
Or, this version, which uses a for-loop, and splits after the delimiter:
extension CollectionType {
func splitAt(#noescape isSplit: Generator.Element throws -> Bool) rethrows -> [SubSequence] {
var p = startIndex
var result: [SubSequence] = []
for i in indices where try isSplit(self[i]) {
result.append(self[p...i])
p = i.successor()
}
if p != endIndex { result.append(suffixFrom(p)) }
return result
}
}
extension CollectionType where Generator.Element : Equatable {
func splitAt(splitter: Generator.Element) -> [SubSequence] {
return splitAt { el in el == splitter }
}
}
let sentence = "Hello, my name is oisdk. This should split: but only at punctuation!"
let puncSet = Set("!.,:".characters)
sentence
.characters
.splitAt(puncSet.contains)
.map(String.init)
// ["Hello,", " my name is oisdk.", " This should split:", " but only at punctuation!"]
Or, if you wanted to get the most Swift features into one function (defer, throws, a Protocol extension, an evil flatMap, guard, and Optionals):
extension CollectionType {
func splitAt(#noescape isSplit: Generator.Element throws -> Bool) rethrows -> [SubSequence] {
var p = startIndex
var result: [SubSequence] = try indices.flatMap { i in
guard try isSplit(self[i]) else { return nil }
defer { p = i.successor() }
return self[p...i]
}
if p != endIndex { result.append(suffixFrom(p)) }
return result
}
}
I came here looking for an answer to this question. Didn't find what I was looking for and ended up building this by repeated calls to .split(...) It isn't elegant but you can choose which delimiters are preserved and which aren't. There's probably a way to avoid the String <--> Substring conversions, anyone know?
var input = """
{All those moments will be (lost in time)},
like tears [in rain](. ([(Time to)] die))
"""
var separator: Character = "!"
var output: [String] = []
repeat {
let tokens = input.split(
maxSplits: 1,
omittingEmptySubsequences: false,
whereSeparator: {
switch $0 {
case "{", "}", "(", ")", "[", "]": // preserve
separator = $0; return true
case " ", "\n", ",", ".": // omit
separator = " "; return true
default:
return false
}
}
)
if tokens[0] != "" {
output.append(String(tokens[0]))
}
guard tokens.count == 2 else { break }
if separator != " " {
output.append(String(separator))
}
input = String(tokens[1])
} while true
for token in output { print("\(token)") }
In the case above, the selectors are not in actual sets. I didn't need that, but if you do, simply make these declarations,
let preservedDelimiters: Set<Character> = [ "{", "}", "(", ")", "[", "]" ]
let omittedDelimiters: Set<Character> = [ " ", "\n", ",", "." ]
and replace the whereSeparator function with:
whereSeparator: {
if preservedDelimiters.contains($0) {
separator = $0
return true
} else if omittedDelimiters.contains($0) {
separator = " "
return true
} else {
return false
}
}