Find String from String using NSRegularExpression Swift - swift

I want to fetch url of images from the String using NSRegularExpression.
func findURlUsingExpression(urlString: String){
do{
let expression = try NSRegularExpression(pattern: "\\b(http|https)\\S*(jpg|png)\\b", options: NSRegularExpressionOptions.CaseInsensitive)
let arrMatches = expression.matchesInString(urlString, options: NSMatchingOptions(rawValue: 0), range: NSMakeRange(0, urlString.characters.count))
for match in arrMatches{
let matchText = urlString.substringWithRange(Range(urlString.startIndex.advancedBy(match.range.location) ..< urlString.startIndex.advancedBy(match.range.location + match.range.length)))
print(matchText)
}
}catch let error as NSError{
print(error.localizedDescription)
}
}
It works with just the simple string but not with the HTML String.
Working Example:
let tempString = "jhgsfjhgsfhjgajshfgjahksfgjhs http://jhsgdfjhjhggajhdgsf.jpg jahsfgh asdf ajsdghf http://jhsgdfjhjhggajhdgsf.png"
findURlUsingExpression(tempString)
Output:
http://jhsgdfjhjhggajhdgsf.jpg
http://jhsgdfjhjhggajhdgsf.png
But not working with this one: http://www.writeurl.com/text/478sqami3ukuug0r0bdb/i3r86zlza211xpwkdf2m

Don't roll your own regex if you can help it. Easiest and safest way is to use NSDataDetector. By using NSDataDetector you leverage a pre-built, heavily used parsing tool which should already have most of the bugs shaken out of it.
Here is a good article on it: NSData​Detector
NSDataDetector is a subclass of NSRegularExpression, but instead of
matching on an ICU pattern, it detects semi-structured information:
dates, addresses, links, phone numbers and transit information.
import Foundation
let tempString = "jhgsfjhgsfhjgajshfgjahksfgjhs http://example.com/jhsgdfjhjhggajhdgsf.jpg jahsfgh asdf ajsdghf http://example.com/jhsgdfjhjhggajhdgsf.png"
let types: NSTextCheckingType = [.Link]
let detector = try? NSDataDetector(types: types.rawValue)
detector?.enumerateMatchesInString(tempString, options: [], range: NSMakeRange(0, (tempString as NSString).length)) { (result, flags, _) in
if let result = result?.URL {
print(result)
}
}
// => "http://example.com/jhsgdfjhjhggajhdgsf.jpg"
// => "http://example.com/jhsgdfjhjhggajhdgsf.png"
The example is from that site, adapted to search for a link.

Related

NSAttributedString: turn (multiple) #username[userid] mentions into clickable #username links

I am in the process of writing code to display mentions within an NSAttributedString, which need to link out to a user profile. The format of the mentions is #username[userid], which would need to be displayed as simply #username, which is tappable.
I have the code working so far that the username becomes clickable, but I now need to remove the [userid] part, which of course modifies the length of the string so that ranges don't match anymore, etc. Not sure how I can solve this.
import Foundation
import UIKit
let comment = "Hello #kevin[1], #john and #andrew[2]!"
let wholeRange = NSRange(comment.startIndex..<comment.endIndex, in: comment)
let regex = try NSRegularExpression(pattern: #"(#[\w.-#]+)\[(\d+)\]"#)
let attributedString = NSMutableAttributedString(string: comment)
regex.enumerateMatches(in: comment, options: [], range: wholeRange) { match, _, _ in
guard let match = match else {
return
}
let userIdRange = Range(match.range(at: 2), in: comment)!
let userId = comment[userIdRange]
let usernameRange = match.range(at: 1)
attributedString.addAttribute(NSAttributedString.Key.link, value: URL(string: "test://profile/\(userId)")!, range: usernameRange)
}
print(attributedString)
The result right now can be represented like this, when printed:
Hello {
}#kevin{
NSLink = "test://profile/1";
}[1], #john and {
}#andrew{
NSLink = "test://profile/2";
}[2]!{
}
So #kevin and #andrew are links, #john is not (which is expected!), but the user ids are still visible. Surely this is a problem that has been solved before but I can't find any examples, not sure what keywords to search for. There are plenty of questions about detecting usernames/mentions in strings, and even more about making links in NSAttributedString, but that's not the problem I am trying to solve.
How would I turn the #username[userid] mentions into clickable #username links, so that the [userid] part is hidden?
You just need to get all the matching ranges, iterate them in reverse order, add the link to it and then replace the whole range with the name. Something like:
let comment = "Hello #kevin[1], #john and #andrew[2]!"
let attributedString = NSMutableAttributedString(string: comment)
let regex = try NSRegularExpression(pattern: #"(#[\w.-#]+)\[(\d+)\]"#)
var ranges: [(NSRange,NSRange,NSRange)] = []
regex.enumerateMatches(in: comment, range: NSRange(comment.startIndex..., in: comment)) { match, _, _ in
guard let match = match else {
return
}
ranges.append((match.range(at: 0),
match.range(at: 1),
match.range(at: 2)))
}
ranges.reversed().forEach {
let userId = attributedString.attributedSubstring(from: $0.2).string
let username = attributedString.attributedSubstring(from: $0.1).string
attributedString.addAttribute(.link, value: URL(string: "test://profile/\(userId)")!, range: $0.0)
attributedString.replaceCharacters(in: $0.0, with: username)
}
print(attributedString)
This will print
Hello {
}#kevin{
NSLink = "test://profile/1";
}, #john and {
}#andrew{
NSLink = "test://profile/2";
}!{
}
Quickly done:
let comment = "Hello #kevin[1], #john and #andrew[2]!"
let attributedString = NSMutableAttributedString(string: comment)
let wholeRange = NSRange(attributedString.string.startIndex..<attributedString.string.endIndex, in: attributedString.string)
let regex = try NSRegularExpression(pattern: #"(#[\w.-#]+)\[(\d+)\]"#)
let matches = regex.matches(in: attributedString.string, options: [], range: wholeRange)
matches.reversed().forEach { aResult in
let fullMatchRange = Range(aResult.range(at: 0), in: attributedString.string)! //#kevin[1]
let replacementRange = Range(aResult.range(at: 1), in: attributedString.string)! //#kevin
let userIdRange = Range(aResult.range(at: 2), in: attributedString.string)! // 1
let atAuthor = String(attributedString.string[replacementRange])
attributedString.addAttribute(.link,
value: URL(string: "test://profile/\(attributedString.string[userIdRange])")!,
range: NSRange(fullMatchRange, in: attributedString.string))
attributedString.replaceCharacters(in: NSRange(fullMatchRange, in: attributedString.string),
with: atAuthor)
}
print(attributedString)
Output:
Hello {
}#kevin{
NSLink = "test://profile/1";
}, #john and {
}#andrew{
NSLink = "test://profile/2";
}!{
}
What's to see:
I changed the pattern, for easy captures. See the sample in comment in the forEach().
I used matches in reverse order, else the ranges won't be accurate anymore!
I kept playing with attributedString.string instead of comment in case it's "unsync".

regular expressions: simplified form?

I'm finally learning swift. The documentation that I've seen for regex in swift consist of something like the following:
let testString = "hat"
let range = NSRange(location: 0, length: testString.utf16.count)
let regex = try! NSRegularExpression(pattern: "[a-z]at")
let r = regex.firstMatch(in: testString, options: [], range: range) != nil
print("ns-based regex match?: ", r)
Is this the preferred/only way of doing this or is there an updated technique?
It's a bit verbose.
We’d often just use range(of:options:range:locale:) with a .regularExpression:
let testString = "foo hat"
if let range = testString.range(of: "[a-z]at", options: .regularExpression) {
print(testString[range])
}
If you don't need some of the more advanced NSRegularExpression options, the above is bit simpler.

How to find if String really MATCHES regex [duplicate]

How to check whether a WHOLE string can be matches to regex? In Java is method String.matches(regex)
You need to use anchors, ^ (start of string anchor) and $ (end of string anchor), with range(of:options:range:locale:), passing the .regularExpression option:
import Foundation
let phoneNumber = "123-456-789"
let result = phoneNumber.range(of: "^\\d{3}-\\d{3}-\\d{3}$", options: .regularExpression) != nil
print(result)
Or, you may pass an array of options, [.regularExpression, .anchored], where .anchored will anchor the pattern at the start of the string only, and you will be able to omit ^, but still, $ will be required to anchor at the string end:
let result = phoneNumber.range(of: "\\d{3}-\\d{3}-\\d{3}$", options: [.regularExpression, .anchored]) != nil
See the online Swift demo
Also, using NSPredicate with MATCHES is an alternative here:
The left hand expression equals the right hand expression using a regex-style comparison according to ICU v3 (for more details see the ICU User Guide for Regular Expressions).
MATCHES actually anchors the regex match both at the start and end of the string (note this might not work in all Swift 3 builds):
let pattern = "\\d{3}-\\d{3}-\\d{3}"
let predicate = NSPredicate(format: "self MATCHES [c] %#", pattern)
let result = predicate.evaluate(with: "123-456-789")
What you are looking for is range(of:options:range:locale:) then you can then compare the result of range(of:option:) with whole range of comparing string..
Example:
let phoneNumber = "(999) 555-1111"
let wholeRange = phoneNumber.startIndex..<phoneNumber.endIndex
if let match = phoneNumber.range(of: "\\(?\\d{3}\\)?\\s\\d{3}-\\d{4}", options: .regularExpression), wholeRange == match {
print("Valid number")
}
else {
print("Invalid number")
}
//Valid number
Edit: You can also use NSPredicate and compare your string with evaluate(with:) method of its.
let pattern = "^\\(?\\d{3}\\)?\\s\\d{3}-\\d{4}$"
let predicate = NSPredicate(format: "self MATCHES [c] %#", pattern)
if predicate.evaluate(with: "(888) 555-1111") {
print("Valid")
}
else {
print("Invalid")
}
Swift extract regex matches
with little bit of edit
import Foundation
func matches(for regex: String, in text: String) -> Bool {
do {
let regex = try NSRegularExpression(pattern: regex)
let nsString = text as NSString
let results = regex.matches(in: text, range: NSRange(location: 0, length: nsString.length))
return !results.isEmpty
} catch let error {
print("invalid regex: \(error.localizedDescription)")
return false
}
}
Example usage from link above:
let string = "19320"
let matched = matches(for: "^[1-9]\\d*$", in: string)
print(matched) // will match
let string = "a19320"
let matched = matches(for: "^[1-9]\\d*$", in: string)
print(matched) // will not match

Trying to parse HTML in Swift 4 using only the Standard Library

I'm trying to parse some HTML to pull all links that come after any occurrences of the string:
market_listing_row_link" href="
to gather a list of item URL's using only the Swift 4 Standard Library.
What I think I need is a for loop that keeps on checking characters with a condition that once the full string is found, it then starts reading the following item URL into an array until a double quote is reached, stopping and then repeating this process until the end of file. Slightly familiar in C we had access to a function (I think it was fgetc) that did this while advancing a position indicator for the file. Is there any similar way to do this in Swift?
My code so far can only find the first occurrence of the string I'm looking for when there are 10 I need to find.
import Foundation
extension String {
func slice(from: String, to: String) -> String? {
return (range(of: from)?.upperBound).flatMap { substringFrom in
(range(of: to, range: substringFrom..<endIndex)?.lowerBound).map { substringTo in
String(self[substringFrom..<substringTo])
}
}
}
}
let itemListURL = URL(string: "http://steamcommunity.com/market/search?appid=252490")!
let itemListHTML = try String(contentsOf: itemListURL, encoding: .utf8)
let itemURL = URL(string: itemListHTML.slice(from: "market_listing_row_link\" href=\"", to: "\"")!)!
print(itemURL)
// Prints the current first URL found matching: http://steamcommunity.com/market/listings/252490/Wyrm%20Chest
You can use regex to find all string occurrences between two specific strings (check this SO answer) and use the extension method ranges(of:) from this answer to get all ranges of that regex pattern. You just need to pass options .regularExpression to that method.
extension String {
func ranges(of string: String, options: CompareOptions = .literal) -> [Range<Index>] {
var result: [Range<Index>] = []
var start = startIndex
while let range = range(of: string, options: options, range: start..<endIndex) {
result.append(range)
start = range.lowerBound < range.upperBound ? range.upperBound : index(range.lowerBound, offsetBy: 1, limitedBy: endIndex) ?? endIndex
}
return result
}
func slices(from: String, to: String) -> [Substring] {
let pattern = "(?<=" + from + ").*?(?=" + to + ")"
return ranges(of: pattern, options: .regularExpression)
.map{ self[$0] }
}
}
Testing playground
let itemListURL = URL(string: "http://steamcommunity.com/market/search?appid=252490")!
let itemListHTML = try! String(contentsOf: itemListURL, encoding: .utf8)
let result = itemListHTML.slices(from: "market_listing_row_link\" href=\"", to: "\"")
result.forEach({print($0)})
Result
http://steamcommunity.com/market/listings/252490/Night%20Howler%20AK47
http://steamcommunity.com/market/listings/252490/Hellcat%20SAR
http://steamcommunity.com/market/listings/252490/Metal
http://steamcommunity.com/market/listings/252490/Volcanic%20Stone%20Hatchet
http://steamcommunity.com/market/listings/252490/Box
http://steamcommunity.com/market/listings/252490/High%20Quality%20Bag
http://steamcommunity.com/market/listings/252490/Utilizer%20Pants
http://steamcommunity.com/market/listings/252490/Lizard%20Skull
http://steamcommunity.com/market/listings/252490/Frost%20Wolf
http://steamcommunity.com/market/listings/252490/Cloth

Get body text from message URL in Swift

This question follows on from the question:
Drag messages from Mail onto Dock using Swift
I have now received a drag and drop message from dragging a message from Mail to the dock. The only thing that the I get is the message title and the message URL as follows:
message:%3C2004768713.4671#mail.stackoverflow.com%3E
How do I get the body text from this URL?
Thanks
Andrew
You can use a simple regular expression for this.
The .* parameter is hungry and will try to match as much as possible of the url subject:message pair before the (hopefully) last # character in the string.
I have also included a small tool function (rangeFromNSRange()) to help convert an Objective-C NSRange struct into a proper Range<String.Index> Swift struct.
Paste the following in a Swift playground to see it working (and have fun!):
import Cocoa
func rangeFromNSRange(range: NSRange, string: String) -> Range<String.Index>
{
let startIndex = string.startIndex.advancedBy(range.location)
let endIndex = startIndex.advancedBy(range.length)
return Range(start: startIndex, end: endIndex)
}
func parseEmailURL(urlAsString url: String) -> (subject: String, message: String)?
{
let regexFormat = "^(.*):(.*)#"
if let regex = try? NSRegularExpression(pattern: regexFormat, options: NSRegularExpressionOptions.CaseInsensitive)
{
let matches = regex.matchesInString(url, options: NSMatchingOptions(rawValue: 0), range: NSMakeRange(0, url.characters.count))
if let match = matches.first
{
let subjectRange = rangeFromNSRange(match.rangeAtIndex(1), string: url)
let messageRange = rangeFromNSRange(match.rangeAtIndex(2), string: url)
let subject = url.substringWithRange(subjectRange)
let message = url.substringWithRange(messageRange)
return (subject, message)
}
}
return nil
}
parseEmailURL(urlAsString: "message:2004768713.4671#mail.stackoverflow.com")