swift soup parsing with evaluateJavaScript in webview - swift

I want to get a video URL from HTML -:
<div class="_53mw" data-store="{"videoID":"607125377233758","playerFormat":"inline","playerOrigin":"permalink","external_log_id":null,"external_log_type":null,"rootID":607125377233758,"playerSuborigin":"misc","useOzLive":false,"playbackIsLiveStreaming":false,"canUseOffline":null,"playOnClick":true,"videoDebuggerEnabled":false,"videoViewabilityLoggingEnabled":false,"videoViewabilityLoggingPollingRate":-1,"videoScrollUseLowThrottleRate":true,"playInFullScreen":false,"type":"video","src":"*https:\/\/video.fdel10-1.fna.fbcdn.net\/v\/t42.1790-2\/271574467_153621553687266_1119427332980623121_n.mp4?_nc_cat=106&ccb=1-5&_nc_sid=985c63&efg=eyJ2ZW5jb2RlX3RhZyI6InN2ZV9zZCJ9&_nc_ohc=HuufVpgJRnEAX8_AEix&_nc_rml=0&_nc_ht=video.fdel10-1.fna&oh=00_AT8eUqTIMXRHidmafowZmL7-o4k2JG0FqA4QbFKNINiQ8Q&oe=61DD4DFB","width":414,"height":621*,"trackingNodes":"FH-R","downloadResources":null,"subtitlesSrc":null,"spherical":false,"sphericalParams":null,"defaultQuality":null,"availableQualities":null,"playStartSec":null,"playEndSec":null,"playMuted":null,"disableVideoControls":false,"loop":true,"numOfLoops":13,"shouldPlayInline":true,"dashManifest":null,"isAdsPreview":false,"iframeEmbedReferrer":null,"adClientToken":null,"audioOnlyVideoSrc":null,"audioOnlyEnabled":false,"permalinkShareID":null,"feedPosition":null,"chainDepth":null,"videoURL":"https:\/\/www.facebook.com\/100069898026392\/videos\/607125377233758\/","disableLogging":false}" data-sigil="inlineVideo">
i am doing this-:
do {
let doc: Document = try SwiftSoup.parseBodyFragment(html)
let headerTitle = try doc.title()
// my body
let body = doc.body()
// elements to remove, in this case images
let undesiredElements: Elements? = try body?.select("a")
//remove
try! undesiredElements?.remove()
// print("Header title: \(headerTitle)")
print("Header body: \(body)")
}
How to do this with swift soup

Try this code - :
do {
let doc: Document = try SwiftSoup.parse(html)
let size = try doc.getElementsByClass("_53mw").first()
let data = try size?.attr("data-store")
print(data!)
let videoUrl = convertToDictionary(text: data!)
let url = videoUrl!["src"] as! String
print(url)
} catch Exception.Error(let type, let message) {
print("Message: \(message)")
} catch {
print("error")
}

Related

Why is this Swift web scraper not working?

I am having trouble scraping an image HTML link with a code I found on youtube (https://www.youtube.com/watch?v=0jTyKu9DGm8&list=PLYjXqILgs9uPwYlmSrIkNj2O3dwPCcoBK&index=2). The code works perfectly fine in a playground, but there is something wrong with my implementation into an Xcode project. (More like: im not sure how to implement it into my project :) )
When I ran this code on a Playground it pulled the link that I needed exactly as I needed it to be outputted.
import Foundation
let url = URL(string: "https://guide.michelin.com/th/en/bangkok-
region/bangkok/restaurant/somtum-khun-kan")
let task = URLSession.shared.dataTask(with: url!) { (data, resp, error) in
guard let data = data else {
print("data was nil")
return
}
guard let htmlString = String(data: data, encoding: String.Encoding.utf8) else {
print("can not cast data into string")
return
}
let leftSideOfTheString = """
image":"
"""
let rightSideOfTheString = """
","#type
"""
guard let leftRange = htmlString.range(of: leftSideOfTheString) else {
print("can not find left range of string")
return
}
guard let rightRange = htmlString.range(of: rightSideOfTheString) else {
print("can not find right range of string")
return
}
let rangeOfValue = leftRange.upperBound..<rightRange.lowerBound
print(htmlString[rangeOfValue])
}
task.resume()
I then put the same exact code into a structure containing the code as a parameter and method, like so:
struct ImageLink {
let url = URL(string: "https://guide.michelin.com/th/en/bangkok-region/bangkok/restaurant/somtum-khun-kan")
func getImageLink() {
let task = URLSession.shared.dataTask(with: url!) { (data, resp, error) in
guard let data = data else {
print("data was nil")
return
}
guard let htmlString = String(data: data, encoding: String.Encoding.utf8) else {
print("can not cast data into string")
return
}
let leftSideOfTheString = """
image":"
"""
let rightSideOfTheString = """
","#type
"""
guard let leftRange = htmlString.range(of: leftSideOfTheString) else {
print("can not find left range of string")
return
}
guard let rightRange = htmlString.range(of: rightSideOfTheString) else {
print("can not find right range of string")
return
}
let rangeOfValue = leftRange.upperBound..<rightRange.lowerBound
print(htmlString[rangeOfValue])
}
task.resume()
}
}
Finally, to check if the code would give me the right link, I made an instance in a View and made a button printing the getImageLink() function like bellow. You'll see in commented out code that I tried displaying the image both by hard coding its link and by inserting the function call. The former worked as expected, the latter did not work.
import SwiftUI
struct WebPictures: View {
var imageLink = ImageLink()
var body: some View {
VStack {
//AsyncImage(url: URL(string: "\(imageLink.getImageLink())"))
//AsyncImage(url: URL(string: "https://axwwgrkdco.cloudimg.io/v7/__gmpics__/c8735576e7d24c09b45a4f5d56f739ba?width=1000"))
Button {
print(imageLink.getImageLink())
} label: {
Text("Print Html")
}
}
}
}
When I click the button to print the link I get the following message:
()
2022-05-16 17:21:30.030264+0800 MichelinRestaurants[35477:925525] [boringssl]
boringssl_metrics_log_metric_block_invoke(153) Failed to log metrics
https://axwwgrkdco.cloudimg.io/v7/__gmpics__/c8735576e7d24c09b45a4f5d56f739ba?width=1000
And if I click the button for a second time only this gets printed:
()
https://axwwgrkdco.cloudimg.io/v7/__gmpics__/c8735576e7d24c09b45a4f5d56f739ba?width=1000
If anybody knows how to help me out here that would be much appreciated!!
This fails because you do not wait until your func has pulled the link. You are in an async context here. One possible solution:
//Make a class in instead of a struct and inherit from ObservableObject
class ImageLink: ObservableObject {
let url = URL(string: "https://guide.michelin.com/th/en/bangkok-region/bangkok/restaurant/somtum-khun-kan")
//Create a published var for your view to get notified when the value changes
#Published var imageUrlString: String = ""
func getImageLink() {
let task = URLSession.shared.dataTask(with: url!) { (data, resp, error) in
guard let data = data else {
print("data was nil")
return
}
guard let htmlString = String(data: data, encoding: String.Encoding.utf8) else {
print("can not cast data into string")
return
}
let leftSideOfTheString = """
image":"
"""
let rightSideOfTheString = """
","#type
"""
guard let leftRange = htmlString.range(of: leftSideOfTheString) else {
print("can not find left range of string")
return
}
guard let rightRange = htmlString.range(of: rightSideOfTheString) else {
print("can not find right range of string")
return
}
let rangeOfValue = leftRange.upperBound..<rightRange.lowerBound
print(htmlString[rangeOfValue])
//Assign the scrapped link to the var
imageUrlString = htmlString[rangeOfValue]
}
task.resume()
}
}
And the view:
struct WebPictures: View {
//Observe changes from your imagelink class
#StateObject var imageLink = ImageLink()
var body: some View {
VStack {
AsyncImage(url: URL(string: imageLink.imageUrlString)) // assign imageurl to asyncimage
//AsyncImage(url: URL(string: "https://axwwgrkdco.cloudimg.io/v7/__gmpics__/c8735576e7d24c09b45a4f5d56f739ba?width=1000"))
Button {
imageLink.getImageLink()
} label: {
Text("Print Html")
}
}
}
}
Update:
In order to get the link when the view appears call it this way:
VStack {
AsyncImage(url: URL(string: imageLink.imageUrlString))
}
.onAppear{
if imageLink.imageUrlString.isEmpty{
imageLink.getImageLink()
}
}

Cannot convert value of type 'PlayParameters?' to expected argument type '[MPMusicPlayerPlayParameters]'

I am trying to create a music player with Swift and MusicKit, but I am getting this error when trying to load up the queue with the selected album from MusicKit. I don't get why the playparameters for the album and MPMusicPlayerPlayParametersQueueDescriptor are different.
let player = MPMusicPlayerController.applicationMusicPlayer
let queue = MPMusicPlayerPlayParametersQueueDescriptor(playParametersQueue: heavyRotation[album].playParameters)
player.prepareToPlay()
player.play()
Error on let queue line:
Cannot convert value of type 'PlayParameters?' to expected argument type '[MPMusicPlayerPlayParameters]'
NEW EDIT
var requestURLComponents = URLComponents()
requestURLComponents.scheme = "https"
requestURLComponents.host = "api.music.apple.com"
requestURLComponents.path = "/v1/me/history/heavy-rotation"
requestURLComponents.queryItems = [
URLQueryItem(name: "limit", value: "5")
]
guard let url = requestURLComponents.url else { return }
let request = MusicDataRequest(urlRequest: URLRequest(url: url))
do {
let response = try await request.response()
let decodedResponse = try JSONDecoder().decode(MusicItemCollection<Album>.self, from: response.data)
heavyRotation = decodedResponse
// print(response.debugDescription)
} catch {
print("error")
print(error)
print(error.localizedDescription)
}
...
Button(action: {
Task {
guard let album = heavyRotation[album] as Album? else { return }
print(album)
let player = ApplicationMusicPlayer.shared
player.queue = [album]
try await player.prepareToPlay()
try await player.play()
}
}) {
Image(systemName: "play.fill")
.frame(width: 50, height: 50, alignment: .center)
.fontSize(30)
.foregroundColor(.white)
}
While MusicKit's PlayParameters is a structure and an opaque object, MPMusicPlayerPlayParameters initializers expect a [String: Any] dictionary. You'll have to encode and decode the PlayParameters to MPMusicPlayerParameters.
Here's your example:
guard let parameters = heavyRotation[album].playParameters else { return }
let data = try JSONEncoder().encode(parameters)
let playParameters = try JSONDecoder().decode(MPMusicPlayerPlayParameters.self, from: data)
let queue = MPMusicPlayerPlayParametersQueueDescriptor(playParametersQueue: [playParameters])
let player = MPMusicPlayerController.applicationMusicPlayer
player.setQueue(with: queue)
player.prepareToPlay()
player.play()
If you're using MusicKit for Swift and its music player, you can directly set the album to the queue like this:
guard let album = heavyRotation[album] else { return }
let player = ApplicationMusicPlayer.shared
player.queue = [album]
try await player.prepareToPlay()
try await player.play()
Update - If you're trying to play an album from the library, then I had problems with that as well. As a workaround, you can get the album's local ID and then make another request to the catalog. If there's an album on Apple Music, then it should work.
Here's an example that works fine for me:
do {
/// First request to get the heavy rotation albums
guard let url = URL(string: "https://api.music.apple.com/v1/me/history/heavy-rotation") else { return }
let request = MusicDataRequest(urlRequest: URLRequest(url: url))
let response = try await request.response()
let heavyRotationAlbums = try JSONDecoder().decode(MusicItemCollection<Album>.self, from: response.data)
/// Get the first album
guard let album = heavyRotationAlbums.first else { return }
/// Get the local album ID
let albumID = album.id
/// Another request to get the album from Apple Music Catalog
guard let catalogURL = URL(string: "https://api.music.apple.com/v1/me/library/albums/\(albumID)/catalog") else { return }
let catalogRequest = MusicDataRequest(urlRequest: URLRequest(url: catalogURL))
let catalogResponse = try await catalogRequest.response()
let albums = try JSONDecoder().decode(MusicItemCollection<Album>.self, from: catalogResponse.data)
/// Get the same album, but with the catalog ID
guard let catalogAlbum = albums.first else { return }
/// Encode the parameters
let data = try JSONEncoder().encode(catalogAlbum.playParameters)
/// Decode the parameters to `MPMusicPlayerPlayParameters`
let playParameters = try JSONDecoder().decode(MPMusicPlayerPlayParameters.self, from: data)
// Create the queue
let queue = MPMusicPlayerPlayParametersQueueDescriptor(playParametersQueue: [playParameters])
let player = MPMusicPlayerController.applicationMusicPlayer
/// Set the queue
player.setQueue(with: queue)
try await player.prepareToPlay()
/// Finally, play the album!
player.play()
} catch {
print(error)
}

Swift UIImageView Firebase DispatchQueue

I am using firebase to save and load my images. I have created a new view in Xcode and am using the same code I have been using to load profile images. Yet, this is now throwing an error saying that the url string is nil. The image url data disappears after "DispatchQueue.global().async". What could be causing this and how could I track this? Very strange how this code works for other views yet for this new view it is throwing an error.
let businessProfilePicture = dictionary["profPicString"] as! String
if businessProfilePicture.count > 0 {
let url = URL(string: businessProfilePicture)
print(url)
print("printing the url here to check")
DispatchQueue.global().async {
let dataURL = try? Data(contentsOf: url!)
print(dataURL)
print("printing the data url here")
DispatchQueue.main.async {
print(dataURL)
print("Printing Data to check")
let image = UIImage(data: dataURL!)?.potter_circleo
self.businessProfilePicture.contentMode = UIView.ContentMode.scaleAspectFill
self.businessProfilePicture.image = image
}
}
Full Code
func getWorkLocation() {
let uid = Auth.auth().currentUser?.uid
var profPicURL: String = ""
Database.database().reference().child("employees").child(uid!).child("Business").observe(.value, with: { snapshot in
if snapshot.exists() {
let dictionary = snapshot.value as? NSDictionary
self.businessName.text = dictionary?["businessName"] as? String
self.businessStreet.text = dictionary?["businessStreet"] as? String
self.businessCity.text = dictionary?["businessCity"] as? String
profPicURL = dictionary?["profPicString"] as! String
// set image
if profPicURL.count > 0 {
let url = URL(string: profPicURL)
DispatchQueue.global().async {
let data = try? Data(contentsOf: url!)
DispatchQueue.main.async {
let image = UIImage(data: data!)?.potter_circle
self.businessProfilePicture.contentMode = UIView.ContentMode.scaleAspectFill
self.businessProfilePicture.image = image
}
}
} else {
let image = UIImage(named: "profile picture")?.potter_circle
self.businessProfilePicture.contentMode = UIView.ContentMode.scaleAspectFill
self.businessProfilePicture.image = image
}
} else {
self.businessName.text = ""
self.businessStreet.text = "Go to Add Work Location to send request"
self.businessCity.text = ""
self.deleteButton.isEnabled = false
}
})
}
Are you certain that the URL you create from profPicURL is being created properly?
URL(string:) can fail and return nil. If you then go on to implicitly unwrap it in Data(contentsOf: url!) you will crash.
Similarly, try? Data(contentsOf: url) can return nil. If it does, then when you implicitly unwrap it in UIImage(data: data!) you will crash.
As Jacob said in comments, you need to learn more about implicitly unwrapped optionals. To get you started, you might structure your code something like this:
if let url = URL(string: profPicURL) {
DispatchQueue.global().async {
if let data = try? Data(contentsOf: url),
let image = UIImage(data: data)?.potter_circle
{
DispatchQueue.main.async {
self.businessProfilePicture.contentMode = UIView.ContentMode.scaleAspectFill
self.businessProfilePicture.image = image
}
} else {
// raise an an error or set self.businessProfilePicture.image to a generic image or something
}
}
} else {
// raise an an error or set self.businessProfilePicture.image to a generic image or something
}

How to put image to NSCache in Swift?

I make some code using swift 4 to load image from URL, but every time I add images to server, it took a lot of time to load it in colection view or table view. I want to try store it in NScache but i dont understand to do it. can anyone help me, I'm new in swift :(
#objc func loadPosts() {
let url = URL(string: "http://someURL/Url.php")!
var request = URLRequest(url: url)
request.httpMethod = "POST"
let body = "phomepost=\(homepost)"
request.httpBody = body.data(using: String.Encoding.utf8)
URLSession.shared.dataTask(with: request) { data, response, error in
DispatchQueue.main.async(execute: {
if error == nil {
do{
let json = try JSONSerialization.jsonObject(with: data!, options: .mutableContainers) as? NSDictionary
self.comments.removeAll(keepingCapacity: false)
self.images.removeAll(keepingCapacity: false)
self.collectionView?.reloadData()
guard let parseJSON = json else {
print("Error While Parsing")
return
}
guard let posts = parseJSON["posts"] as? [AnyObject] else {
print("Error while parseJSONing")
return
}
self.comments = posts.reversed()
for i in 0 ..< self.comments.count {
let path = self.comments[i]["path"] as? String
if !path!.isEmpty {
let url = NSURL(string: path!)!
let imageData = try? Data(contentsOf: url as URL)
let image = UIImage(data: imageData! as Data)!
self.images.append(image)
} else {
let image = UIImage()
self.images.append(image)
}
}
self.collectionView?.reloadData()
//print(posts.count)
} catch {
print(error)
}
}else{
print(error)
}
})
}.resume()
}
You can use something like this:
private let cache = NSCache<NSString, NSData>()
.....
func downloadImage(url: String, handler: #escaping(Data?, Error?) -> Void){
let cacheID = NSString(string: url)
if let cachedData = cache.object(forKey: cacheID) {
handler((cachedData as Data), nil)
}else{
if let url = URL(string: url) {
let session = URLSession(configuration: urlSessionConfig)
var request = URLRequest(url: url)
request.cachePolicy = .returnCacheDataElseLoad
request.httpMethod = "get"
session.dataTask(with: request) { (data, response, error) in
if let _data = data {
self.cache.setObject(_data as NSData, forKey: cacheID)
handler(_data, nil)
}else{
handler(nil, error)
}
}.resume()
} else {
// NetworkError is a custom error
handler(nil, NetworkError.invalidURL)
}
}
}
}
This will add a small animation while loading using image set.
let imageCache = NSCache<AnyObject, AnyObject>()
extension UIImageView {
func loadImageFromUrl(urlString: String) {
let loader1 = UIImage(named: "loaderImage1.png")
let loader2 = UIImage(named: "loaderImage2.png")
let loader3 = UIImage(named: "loaderImage3.png")
let imageArray = [loader1, loader2, loader3]
let animatedImage = UIImage.animatedImage(with: imageArray as! [UIImage], duration: 1.7)
if let imageFromCache = imageCache.object(forKey: urlString as AnyObject) as? UIImage{
self.image = imageFromCache
return
} else {
self.image = animatedImage
Alamofire.request(urlString, method: .get).response { (responseData) in
if let data = responseData.data {
DispatchQueue.main.async {
if let imageToCache = UIImage(data: data){
imageCache.setObject(imageToCache, forKey: urlString as AnyObject)
self.image = imageToCache
}
}
}
} //alamofire
}
}
}

Parse HTML with Swiftsoup (Swift)?

I'm trying to parse some websites with Swiftsoup, let's say one of the websites is from Medium. How can I extract the body of the website and load the body to another UIViewController like what Instapaper does?
Here is the code I use to extract the title:
import SwiftSoup
class WebViewController: UIViewController, UIWebViewDelegate {
...
override func viewDidLoad() {
super.viewDidLoad()
let url = URL(string: "https://medium.com/#timjwise/stop-lying-to-yourself-when-you-snub-panhandlers-its-not-for-their-own-good-199d0aa7a513")
let request = URLRequest(url: url!)
webView.loadRequest(request)
guard let myURL = url else {
print("Error: \(String(describing: url)) doesn't seem to be a valid URL")
return
}
let html = try! String(contentsOf: myURL, encoding: .utf8)
do {
let doc: Document = try SwiftSoup.parseBodyFragment(html)
let headerTitle = try doc.title()
print("Header title: \(headerTitle)")
} catch Exception.Error(let type, let message) {
print("Message: \(message)")
} catch {
print("error")
}
}
}
But I got no luck to extract the body of the website or any other websites, any way to get it work? CSS or JavaScript (I know nothing about CSS or Javascript)?
Use function body https://github.com/scinfu/SwiftSoup#parsing-a-body-fragment
Try this:
let html = try! String(contentsOf: myURL, encoding: .utf8)
do {
let doc: Document = try SwiftSoup.parseBodyFragment(html)
let headerTitle = try doc.title()
// my body
let body = doc.body()
// elements to remove, in this case images
let undesiredElements: Elements? = try body?.select("img[src]")
//remove
undesiredElements?.remove()
print("Header title: \(headerTitle)")
} catch Exception.Error(let type, let message) {
print("Message: \(message)")
} catch {
print("error")
}