Merging and copying layers in qgis - merge

Im trying to merge two layers and an already created shape file(1) into a new shape file(2).
I have to copy this newly created shapefile(2) into shapefile(1) so that this new file is used for merging next time.
This I am running in a loop so that at the end I get a shapefile with all the features merged. But my shapefile(1) is not getting updated so my merging is also not correct.
Please find the code below and help me
`
import qgis
from qgis.core import QgsVectorLayer, QgsProject,QgsGeometry
from PyQt5.QtCore import QVariant
import pandas as pd
import processing
import del_temp2 as d
layer2= QgsVectorLayer('LineString', 'line' , "memory")
QgsVectorFileWriter.writeAsVectorFormat(layer2, 'D:/Users/Lahari/ground_trace/orbit_orig.shp', "UTF-8", layer2.crs(), "ESRI Shapefile")
for i in orb:
list1= list.loc[(list['Orbit'] == orb[i])]
list1.to_csv ('D:/Users/Lahari/ground_trace/int/Test3.csv',
index = None,
header=True)
uri='file:///D:/Users/Lahari/ground_trace/int/Test3.csv?delimiter=,&yField=long&xField=Lat'
layer1 = QgsVectorLayer(uri, 'orb', 'delimitedtext')
crs = layer1.crs()
crs.createFromId(4326)
layer1.setCrs(crs)
QgsVectorFileWriter.writeAsVectorFormat(layer1, 'D:/Users/Lahari/ground_trace/orbit_temp1.shp', "UTF-8", layer1.crs(), "ESRI Shapefile", layerOptions=['SHPT=POINT'])
processing.run("qgis:pointstopath",{'INPUT':'D:/Users/Lahari/ground_trace/orbit_temp1.shp','ORDER_FIELD':'Lat','OUTPUT':'D:/Users/Lahari/ground_trace/orbit_temp2.shp'})
processing.run("qgis:arrayoffsetlines",{'INPUT':'D:/Users/Lahari/ground_trace/orbit_temp2.shp','OFFSET':-1,'OUTPUT':'D:/Users/Lahari/ground_trace/orbit_trans1.shp'})
processing.run("qgis:arrayoffsetlines",{'INPUT':'D:/Users/Lahari/ground_trace/orbit_temp2.shp','OFFSET': 1,'OUTPUT':'D:/Users/Lahari/ground_trace/orbit_trans2.shp'})
processing.run("qgis:mergevectorlayers",{'LAYERS':['D:/Users/Lahari/ground_trace/orbit_trans1.shp','D:/Users/Lahari/ground_trace/orbit_trans2.shp','D:/Users/Lahari/ground_trace/orbit_orig.shp#shapefile1'],'CRS':layer1.crs,'OUTPUT':'D:/Users/Lahari/ground_trace/orbit_actual.shp'#shapefile2})
layer3 = QgsVectorLayer("D:/Users/Lahari/ground_trace/orbit_actual.shp", "Test", "ogr")
QgsProject.instance().addMapLayer(layer3)
QgsVectorFileWriter.writeAsVectorFormat(layer3, 'D:/Users/Lahari/ground_trace/orbit_orig.shp', "UTF-8", layer3.crs(), "ESRI Shapefile")
QgsProject.instance().removeMapLayer(layer3.id())
`

Related

Download historical prices for multiple stocks

Here's what I tried (from one suggestion in this group):
from pandas_datareader import data as dreader
symbols = ['GOOG', 'AAPL', 'MMM', 'ACN', 'A', 'ADP']
pnls = {i:dreader.DataReader(i,'yahoo','1985-01-01','2016-09-01') for i in symbols}
this is the error
RemoteDataError: Unable to read URL: https://finance.yahoo.com/quote/GOOG/history?period1=473380200&period2=1472768999&interval=1d&frequency=1d&filter=history
Here's another code that does not seem to work:
import time
import datetime
import pandas as pd
tickers = ['TSLA', 'TWTR', 'MSFT', 'GOOG', 'AAPL']
interval = '1d'
period1 = int(time.mktime(datetime.datetime(2021, 1, 1, 23, 59).timetuple()))
period2 = int(time.mktime(datetime.datetime(2022, 5, 31, 23, 59).timetuple()))
xlwriter = pd.ExcelWriter('historicalprices.xlsx', engine='openpyxl')
for ticker in tickers:
query_string = 'https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={period1}&period2={period2}&interval={interval}&events=history&includeAdjustedClose=true'
df = pd.read_csv(query_string)
df.to_excel(xlwriter, sheet_name=ticker, index = False)
xlwriter.save()
this is the error:
HTTPError: HTTP Error 500: Internal Server Error
This code works for individual stocks:
yahoo_financials = YahooFinancials('AAPL')
data = yahoo_financials.get_historical_price_data(start_date='2021-12-01', end_date='2022-05-31',
time_interval='daily')
aapl_df = pd.DataFrame(data['AAPL']['prices'])
aapl_df = aapl_df.drop('date', axis=1).set_index('formatted_date')
aapl_df.to_csv('/Users/rangapriyas/Desktop/Prices/AAPL.csv')
I am new to python can anyone help me with a for loop on this above code pls?
Thanks in advance!
This code works for anyone who may benefit:
import yfinance as yf
import pandas as pd
tickers_list = ['AAPL', 'WMT', 'IBM', 'MU', 'BA', 'AXP']
# Fetch the data
import yfinance as yf
data = yf.download(tickers_list,'2015-1-1', '2022-05-31')['Adj Close']
# Print first 5 rows of the data
print(data.head())

Importing images from Github to Colab

I am having trouble importing my own images to https://colab.research.google.com/github/vijishmadhavan/Light-Up/blob/master/ArtLine.ipynb#scrollTo=eOhPqC6fysD4.
I am able to execute the sample images (e.g., https://wallpapercave.com/wp/wp2504860.jpg), but when I copy the same image and put it into my own Github repository (https://github.com/thiirane/Artline_images/blob/main/wp2504860.jpg), I get this error.
Here is the Code
#url = 'https://wallpapercave.com/wp/wp2504860.jpg' ##param {type:"string"}
url='https://github.com/thiirane/Artline_images/blob/main/wp2504860.jpg'##param {type:"string"}
from google.colab import files
from PIL import Image
from IPython.display import Image
#uploaded = files.upload()
response = requests.get(url)
img= PIL.Image.open(BytesIO(response.content)).convert("RGB")
img_t = T.ToTensor()(img)
img_fast = Image(img_t)
show_image(img_fast, figsize=(8,8), interpolation='nearest');
Here is the error:
UnidentifiedImageError Traceback (most recent call last)
<ipython-input-18-5d0fa6dc025f> in <module>()
8 response = requests.get(url)
9
---> 10 img= PIL.Image.open(BytesIO(response.content)).convert("RGB")
11 img_t = T.ToTensor()(img)
12 img_fast = Image(img_t)
/usr/local/lib/python3.6/dist-packages/PIL/Image.py in open(fp, mode)
2860 warnings.warn(message)
2861 raise UnidentifiedImageError(
-> 2862 "cannot identify image file %r" % (filename if filename else fp)
2863 )
2864
UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7fb88126f0f8>
I would be grateful for your help. It is likely something that I am not doing to allow Colab to access my repository.
It's because the URL is not the direct download link. Use this instead.
import requests
from io import BytesIO
from PIL import Image
url = 'https://raw.githubusercontent.com/thiirane/Artline_images/main/wp2504860.jpg'
page = requests.get(url)
Image.open(BytesIO(page.content))
Or you could use git to download your repository containing images.
!git clone https://github.com/thiirane/Artline_images.git images
from PIL import Image
Image.open('images/wp2504860.jpg')

Webscraper not giving the right results with bs4

I'm trying to scrape the live billionaire networth table here > https://www.bloomberg.com/billionaires/
This is my code so far. All I get is [] as result on the python shell.
Something has to be wrong with the "findAll", I don't think I'm using the correct tag lines.
Tried to use just "find"
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import csv
#Open page and grab html
my_url = ('https://www.bloomberg.com/billionaires/')
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
#HTML Parser.
page_soup = soup(page_html, 'html.parser')
table = []
#Find table.
ele_table = page_soup.findAll('div',{'class':'dvz-content'})
print(ele_table)
I'm expecting for the table to be printed out so I can get it into a CSV file.
Data is dynamically loaded. You can pull from script tag provided you supply the right headers. Regex out the required info and parse with json library. Hand this off to pandas to write to csv
from bs4 import BeautifulSoup as bs
import requests, re, json
import pandas as pd
headers = {
'user-agent': 'Mozilla/5.0',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
'if-none-match': 'W/^\\^5dbb59e6-91b10^\\^',
'if-modified-since': 'Thu, 31 Oct 2019 22:02:14 GMT' # this may be safeguard for caching. Consider if add dynamically.
}
p = re.compile(r'window.top500 = (.*);')
r = requests.get('https://www.bloomberg.com/billionaires/', headers = headers)
data = json.loads(p.findall(r.text)[0])
df = pd.DataFrame(data)
df.to_csv(r'C:\Users\User\Desktop\Data.csv', sep=',', encoding='utf-8-sig',index = False)
Example output:

Using input function with remote files in snakemake

I want to use a function to read inputs file paths from a dataframe and send them to my snakemake rule. I also have a helper function to select the remote from which to pull the files.
from snakemake.remote.GS import RemoteProvider as GSRemoteProvider
from snakemake.remote.SFTP import RemoteProvider as SFTPRemoteProvider
from os.path import join
import pandas as pd
configfile: "config.yaml"
units = pd.read_csv(config["units"]).set_index(["library", "unit"], drop=False)
TMP= join('data', 'tmp')
def access_remote(local_path):
""" Connnects to remote as defined in config file"""
provider = config['provider']
if provider == 'GS':
GS = GSRemoteProvider()
remote_path = GS.remote(join("gs://" + config['bucket'], local_path))
elif provider == 'SFTP':
SFTP = SFTPRemoteProvider(
username=config['user'],
private_key=config['ssh_key']
)
remote_path = SFTP.remote(
config['host'] + ":22" + join(base_path, local_path)
)
else:
remote_path = local_path
return remote_path
def get_fastqs(wc):
"""
Get fastq files (units) of a particular library - sample
combination from the unit sheet.
"""
fqs = units.loc[
(units.library == wc.library) &
(units.libtype == wc.libtype),
"fq1"
]
return {
"r1": list(map(access_remote, fqs.fq1.values)),
}
# Combine all fastq files from the same sample / library type combination
rule combine_units:
input: unpack(get_fastqs)
output:
r1 = join(TMP, "reads", "{library}_{libtype}.end1.fq.gz")
threads: 12
run:
shell("cat {i1} > {o1}".format(i1=input['r1'], o1=output['r1']))
My config file contains the bucket name and provider, which are passed to the function. This works as expected when running simply snakemake.
However, I would like to use the kubernetes integration, which requires passing the provider and bucket name in the command line. But when I run:
snakemake -n --kubernetes --default-remote-provider GS --default-remote-prefix bucket-name
I get this error:
ERROR :: MissingInputException in line 19 of Snakefile:
Missing input files for rule combine_units:
bucket-name/['bucket-name/lib1-unit1.end1.fastq.gz', 'bucket-name/lib1-unit2.end1.fastq.gz', 'bucket-name/lib1-unit3.end1.fastq.gz']
The bucket is applied twice (once mapped correctly to each element, and once before the whole list (which gets converted to a string). Did I miss something ? Is there a good way to work around this ?

Write csv to Ibm bluemix object storage from DSX python 2.7 notebook

I am trying to write a pandas dataframe as CSV to Bluemix Object Storage from a DSX Python notebook. I first save the dataframe to a 'local' CSV file. I then have a routine that attempts to write the file to Object Storage. I get a 413 response - object too large. The file is only about 3MB. Here's my code, based on a JSON example I found here: http://datascience.ibm.com/blog/working-with-object-storage-in-data-science-experience-python-edition/
import requests
def put_file(credentials, local_file_name):
"""This function writes file content to Object Storage V3 """
url1 = ''.join(['https://identity.open.softlayer.com', '/v3/auth/tokens'])
data = {'auth': {'identity': {'methods': ['password'],
'password': {'user': {'name': credentials['name'],'domain': {'id': credentials['domain']},
'password': credentials['password']}}}}}
headers = {'Content-Type': 'text/csv'}
with open(local_file_name, 'rb') as f:
resp1 = requests.post(url=url1, data=f, headers=headers)
return resp1
Any help or pointers is much appreciated.
This code snippet from the tutorial worked fine for me (for a 12 MB file).
from io import BytesIO
import requests
import json
import pandas as pd
def put_file(credentials, local_file_name):
"""This functions returns a StringIO object containing
the file content from Bluemix Object Storage V3."""
f = open(local_file_name,'r')
my_data = f.read()
url1 = ''.join(['https://identity.open.softlayer.com', '/v3/auth/tokens'])
data = {'auth': {'identity': {'methods': ['password'],
'password': {'user': {'name': credentials['username'],'domain': {'id': credentials['domain_id']},
'password': credentials['password']}}}}}
headers1 = {'Content-Type': 'application/csv'}
resp1 = requests.post(url=url1, data=json.dumps(data), headers=headers1)
resp1_body = resp1.json()
for e1 in resp1_body['token']['catalog']:
if(e1['type']=='object-store'):
for e2 in e1['endpoints']:
if(e2['interface']=='public'and e2['region']=='dallas'):
url2 = ''.join([e2['url'],'/', credentials['container'], '/', local_file_name])
s_subject_token = resp1.headers['x-subject-token']
headers2 = {'X-Auth-Token': s_subject_token, 'accept': 'application/json'}
resp2 = requests.put(url=url2, headers=headers2, data = my_data )
print resp2
I created a random pandas dataframe using:
df = pd.DataFrame(np.random.randint(0,100,size=(1000000, 4)), columns=list('ABCD'))
saved it to csv
df.to_csv('myPandasData_1000000.csv',index=False)
and then put it to object store
put_file(credentials_1,'myPandasData_1000000.csv')
You can get the credentials_1 object by clicking insert to code -> Insert credentials for any object in your object store.