SQLAlchemy Asyncio ORM Unable to Query Database When Retrieving Tables and Columns from MetaData - postgresql

Using SQLAlchemy async ORM 1.4, Postgres backend, Python 3.7
I am using an augmented Declarative Base with the SA ORM. The tables are not held in models.py but are committed directly to the database by parsing a JSON script that contains all the table schemas. Because of this, I can't import the models at the top of the script like from models import ThisTable.
So to work with CRUD operations on the tables, I first retrieve them by reflecting the metadata.
In the 'usual' way, when importing all the tables at the top of the script, a query like this works:
result = await s.execute(select(func.sum(TableName.column)))
curr = result.all()
When I try to reflect the table and column objects from the MetaData in order to query them, this doesn't work. There are lots of AttributeError: 'Table' object has no attribute 'func' or TypeError: 'Table' object is not callableerrors.
def retrieve_table_obj(table):
meta = MetaData()
meta.reflect(bind=sync_engine)
return meta.tables[table]
def retrieve_table_cols(self, table):
table = retrieve_table_obj('users')
return table.columns.keys()
async def reading(collection, modifications):
table = db.retrieve_table_obj(collection)
columns = db.retrieve_table_cols(collection)
for c in columns:
for f in mods['fields']:
if c in f:
q = select(func.sum(table.c))
result = await s.execute(q)
curr = result.all()
asyncio.run(reading("users", {'fields': ["usage", "allowance"]}))
How can I query tables and columns in the database when they first have to be explicitly retrieved?

The automap extension can be used to automatically reflect database tables to SQLAlchemy models. However automap uses inspect on the engine, and this isn't supported on async engines; we can work around this by doing the automapping using a synchronous engine. Once the models have been mapped they can be used by the async engine.
For example:
import asyncio
import sqlalchemy as sa
from sqlalchemy import orm
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy.ext.automap import automap_base
sync_engine = sa.create_engine('postgresql:///test', echo=True, future=True)
Base = automap_base()
Base.prepare(sync_engine, reflect=True)
async def async_main(collection, modifications):
engine = create_async_engine(
"postgresql+asyncpg:///test",
echo=True,
future=True,
connect_args={'ssl': False},
)
async_session = orm.sessionmaker(
engine, class_=AsyncSession, future=True
)
async with async_session() as session:
model = Base.classes[collection]
matches = set(model.__mapper__.columns.keys()) & set(modifications['fields'])
for m in matches:
q = sa.select(sa.func.sum(getattr(model, m)))
result = await session.execute(q)
curr = result.all()
for row in curr:
print(row)
print()
# for AsyncEngine created in function scope, close and
# clean-up pooled connections
await engine.dispose()
asyncio.run(reading("users", {'fields': ["usage", "allowance"]}))
If you don't need models, caching the MetaData object rather than recreating it on every call to retrieve_table_obj would make the existing code more efficient, and replacing select(func.sum(table.c)) with select(sa.func.sum(getattr(table.c, c)))

Related

Retrieve data when clicking a button

Trying to receive input from WCT_Control into WCT_DataPull
Cant figure out how to get the data into WCT_DataPull to perform an action with it. I think I am going about this backwards, but I also think I have been staring at it too long.
Essentially, the user enters the information necessary into a GUI to connect to a specific SQL table (predetermined) and then saves the data in the table and outputs it as a csv file backup.
I want the user to click the submit button and that creates the backup. However, at this point when I click the button, it will store all the data in the variables (If I put a print statement in I see the correct values), but I cant seem to figure out how to get the variables to WCT_DataPull, where the backup creation action is performed.
WCT_Control
from PyQt5.QtWidgets import *
from WCT_View import Ui_MainWindow
class Controller(QMainWindow, Ui_MainWindow):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.setupUi(self)
self.run.clicked.connect(lambda : self.submit())
def submit(self):
self.run.clicked.connect()
server = self.server_entry.text()
database = self.data_entry.text()
station = self.station_entry.text()
app = self.app_entry.text()
backup_name = self.filename_entry.text()
self.server_entry.setText('')
self.data_entry.setText('')
self.station_entry.setText('')
self.app_entry.setText('')
self.filename_entry.setText('')
return server, database, station, app, backup_name
WCT_DataPull
from WCT_Control import *
import pyodbc
import csv
pull_data = Controller()
def write_bak():
driver = 'ODBC Driver 17 for SQL Server'
serv, data, stat, app, bak_name = pull_data.submit()
conn = pyodbc.connect('DRIVER={0};SERVER={1};DATABASE={2};Trusted_Connection=yes'.format(driver, serv, data))
cursor = conn.cursor()
rows = cursor.execute("""
select DnsName, PackageName, Code, Value from WorkstationApplicationSettings
where DnsName=? and PackageName=?
""", stat, app).fetchall()
for row in rows:
print(row.PackageName,':', row.Code, ':', row.Value)
with open(bak_name, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(rows)
So you just have to do the things in opposite way, instead of using PYQT5 in WCT script, use the WCT function in PYQT5 script
WCT_Control
from PyQt5.QtWidgets import *
from WCT_DataPull import write_bak
class Controller(QMainWindow, Ui_MainWindow):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.setupUi(self)
self.run.clicked.connect(lambda : self.submit())
def submit(self):
self.run.clicked.connect()
server = self.server_entry.text()
database = self.data_entry.text()
station = self.station_entry.text()
app = self.app_entry.text()
backup_name = self.filename_entry.text()
self.server_entry.setText('')
self.data_entry.setText('')
self.station_entry.setText('')
self.app_entry.setText('')
self.filename_entry.setText('')
write_bak(serv, data, stat, app, bak_name)
WCT_DataPull
import pyodbc
import csv
def write_bak(serv, data, stat, app, bak_name):
driver = 'ODBC Driver 17 for SQL Server'
conn = pyodbc.connect('DRIVER={0};SERVER={1};DATABASE={2};Trusted_Connection=yes'.format(driver, serv, data))
cursor = conn.cursor()
rows = cursor.execute("""
select DnsName, PackageName, Code, Value from WorkstationApplicationSettings
where DnsName=? and PackageName=?
""", stat, app).fetchall()
for row in rows:
print(row.PackageName,':', row.Code, ':', row.Value)
with open(bak_name, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(rows)
Also Make sure to write the code to run the WCT_control by using Qapplication

Dash app connections to AWS postgres DB VERY SLOW

I've created a live-updating dash app connected to a public facing AWS Postgres database. I've put db connection within my callback so it updates, but I find that it takes a long long time to retrieve data and create the graph, such that if the interval time is reduced to 10 seconds or less, no graph loads at all. I've tried to store the data in dcc.store but the initial load still takes a very long time. My abbreviated code is written below. I'm assuming the lag time is from the engine connecting to the database, because I am only reading a few rows and columns. Is there anyway to speed this up?
import plotly.graph_objs as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
from plotly.subplots import make_subplots
from sqlalchemy import create_engine, MetaData, Table
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import declarative_base
from sqlalchemy import Column, Integer, String, func, Date, ARRAY
from sqlalchemy.orm import sessionmaker
app = dash.Dash(__name__, external_stylesheets=[BS], suppress_callback_exceptions=True, update_title=None)
server=app.server
app.layout = html.Div([
dcc.Store(id='time', storage_type='session'),
dcc.Store(id='blood_pressure', storage_type='session'),
html.Div(dcc.Graph(id='live-graph', animate=False), className='w-100'),
html.Div(id= "testing"),
dcc.Interval(
id='graph-update-BP',
interval=30000,
n_intervals=0
)]), width={"size": 10, "offset": 0.5}),
#app.callback(
dash.dependencies.Output('live-graph', 'figure'),
dash.dependencies.Output('blood_pressure', 'data'),
dash.dependencies.Output('time', 'data'),
[dash.dependencies.Input('graph-update-BP', 'n_intervals')],
Input('live-graph', 'relayoutData'),
)
def update_graph_scatter_1(n):
trace = []
blood_pressure = []
time = []
engine = create_engine("postgresql://username:password#address:5432/xxxxx", echo=True, future=True)
Session = sessionmaker(bind=engine)
session = Session()
Base = automap_base()
Base.prepare(engine, reflect=True)
User = Base.classes.users
Datex = Base.classes.data
for instance in session.query(Datex).filter(Datex.user_id == 3).filter(Datex.date_time == 'Monday,Apr:26'):
blood_pressure.append([instance.systolic, instance.mean, instance.diastolic])
time.append(instance.time)
for i in range(0, len(blood_pressure)):
trace.append(go.Box(y=blood_pressure[i],
x=time[i],
line=dict(color='#6a92ff'),
hoverinfo='all'))
fig = make_subplots(rows=1, cols=1)
def append_trace():
for i in range(0, len(trace)):
fig.append_trace(trace[i], 1, 1)
append_trace()
return fig, blood_pressure, hr,
You can increase performance in your app in the following ways:
Non-programming methods:
If your app is deployed on AWS, ensure your app is connecting to your database over private IP. This reduces the number of networks your data has to traverse and will result in significantly lower latency.
Ensure your virtual machine has enough RAM. (If you're loading 2GB of data to a machine with 1GB available RAM, you're going to see the IO hit disk before loading to your program.)
Programming methods:
Modularize connecting to your database, and only do it once. This decreases the overhead required to reserve resources and authenticate connecting to the database
import os
class DbConnection:
"""Use this class to connect to your database within a dashapp"""
def __init__(self, **kwargs):
self.DB_URI = os.environ.get('DB_URI', kwargs.get('DB_URI'))
self.echo = kwargs.get('echo', True)
self.future = kwargs.get('future', True)
# Now create the engine
self.engine = create_engine(self.DB_URI, echo=self.echo, future=self.self)
# Make the session maker
self.session_maker = sessionmaker(bind=self.engine)
#property
def session(self):
"""Return a session as a property"""
return self.session_maker()
# -------------------------------------------
# In your app, instantiate the database connection
# and map your base
my_db_connection = DbConnection() # provide kwargs as needed
session = my_db_connection.session # necessary to assign property to a variable
# Map the classes
Base = automap_base()
Base.prepare(my_db_connection.engine, reflect=True)
User = Base.classes.users
Datex = Base.classes.data
Cache frequently queried data. Unless your data is massive and dramatically varying, you should expect better performance from loading the data from disk (or RAM) on your machine, than over the network from your database.
from functools import lru_cache
#lru_cache()
def get_blood_pressure(session, user_id, date):
"""returns blood pressure for a given user for a given date"""
blood_pressure, time = [], []
query = session.query(Datex)\
.filter(Datex.user_id == 3)\
.filter(Datex.date_time == 'Monday,Apr:26')
# I like short variable names when interacting with db results
for rec in query:
time.append(rec.time)
blood_pressure.append([rec.systolic, rec.mean, rec.diastolic])
# finally
return blood_pressure, time
Putting them all together, your callback should be a lot quicker
def update_graph_scatter_1(n):
# I'm not sure how these variables will be assigned
# but you'll figure it out
blood_pressure, time = get_blood_pressure(session=session, user_id=user_id, date='Monday,Apr:26')
# Create new traces
for i in range(0, len(blood_pressure)):
trace.append(go.Box(
y=blood_pressure[i],
x=time[i],
line=dict(color='#6a92ff'),
hoverinfo='all'
))
# Add to subplots
fig = make_subplots(rows=1, cols=1)
for i in range(0, len(trace)):
fig.append_trace(trace[i], 1, 1)
return fig, blood_pressure, time
Lastly, it looks like you're recreating your graph objects each update. This is a heavy operation. I'd recommend updating the graph's data instead. I know this is possible, since I've done this in the past. But it looks like the solution is not-trivial, unfortunately. Perhaps an item for a later response or follow up Q.
Further reading: https://dash.plotly.com/performance

H2 database content is not persisting on insert and update

I am using h2 database to test my postgres slick functionality.
I created a below h2DbComponent:
trait H2DBComponent extends DbComponent {
val driver = slick.jdbc.H2Profile
import driver.api._
val h2Url = "jdbc:h2:mem:test;MODE=PostgreSQL;DB_CLOSE_DELAY=-1;DATABASE_TO_UPPER=false;INIT=runscript from './test/resources/schema.sql'\\;runscript from './test/resources/schemadata.sql'"
val logger = LoggerFactory.getLogger(this.getClass)
val db: Database = {
logger.info("Creating test connection ..................................")
Database.forURL(url = h2Url, driver = "org.h2.Driver")
}
}
In the above snippet i am creating my tables using schema.sql and inserting a single row(record) with schemadata.sql.
Then i am trying to insert a record into the table as below using my test case:
class RequestRepoTest extends FunSuite with RequestRepo with H2DBComponent {
test("Add new Request") {
val response = insertRequest(Request("XYZ","tk", "DM", "RUNNING", "0.1", "l1", "file1",
Timestamp.valueOf("2016-06-22 19:10:25"), Some(Timestamp.valueOf("2016-06-22 19:10:25")), Some("scienceType")))
val actualResult=Await.result(response,10 seconds)
assert(actualResult===1)
val response2 = getAllRequest()
assert(Await.result(response2, 5 seconds).size === 2)
}
}
The above assert of insert works fine stating that the record is inserted. But the getAllRequest() assert fails as the output still contains the single row(as inserted by schemadata.sql) => which means the insertRequest change is not persisted. However the below statements states that the record is inserted as the insert returned 1 stating one record inserted.
val response = insertRequest(Request("CMP_XYZ","tesco_uk", "DM", "RUNNING", "0.1", "l1", "file1",
Timestamp.valueOf("2016-06-22 19:10:25"), Some(Timestamp.valueOf("2016-06-22 19:10:25")),
Some("scienceType")))
val actualResult=Await.result(response,10 seconds)
Below is my definition of insertRequest:
def insertRequest(request: Request):Future[Int]= {
db.run { requestTableQuery += request }
}
I am unable to figure out how can i see the inserted record. Is there any property/config which i need to add?
But the getAllRequest() assert fails as the output still contains the single row(as inserted by schemadata.sql) => which means the insertRequest change is not persisted
I would double-check that the assert(Await.result(response2, 5 seconds).size === 2) line is failing because of a size difference. Could it be failing for some other general failure?
For example, as INIT is run on each connection it could be that you are re-creating the database for each connection. Unless you're careful with the SQL, that could produce an error such as "table already exists". Adding TRACE_LEVEL_SYSTEM_OUT=2; to your H2 URL can be helpful in tracking what H2 is doing.
A couple of suggestions.
First, you could ensure your SQL only runs as needed. For example, your schema.sql could add checks to avoid trying to create the table twice:
CREATE TABLE IF NOT EXISTS my_table( my_column VARCHAR NULL );
And likewise for your schemadata.sql:
MERGE INTO my_table KEY(my_column) VALUES ('a') ;
Alternatively, you could establish schema and test data around your tests (e.g., possibly in Scala code, using Slick). Your test framework probably has a way to ensure something is run before and after a test or test suit.

SQLAlchemy Core CREATE TEMPORARY TABLE AS

I'm trying to use the PostgreSQL CREATE TEMPORARY TABLE foo AS SELECT... query in SQLAlchemy Core. I've looked through the docs but don't see a way to do this.
I have a SQLA statement object. How do I create a temporary table from its results?
This is what I came up with. Please tell me if this is the wrong way to do it.
from sqlalchemy.sql import Select
from sqlalchemy.ext.compiler import compiles
class CreateTableAs(Select):
"""Create a CREATE TABLE AS SELECT ... statement."""
def __init__(self, columns, new_table_name, is_temporary=False,
on_commit_delete_rows=False, on_commit_drop=False, *arg, **kw):
"""By default the table sticks around after the transaction. You can
change this behavior using the `on_commit_delete_rows` or
`on_commit_drop` arguments.
:param on_commit_delete_rows: All rows in the temporary table will be
deleted at the end of each transaction block.
:param on_commit_drop: The temporary table will be dropped at the end
of the transaction block.
"""
super(CreateTableAs, self).__init__(columns, *arg, **kw)
self.is_temporary = is_temporary
self.new_table_name = new_table_name
self.on_commit_delete_rows = on_commit_delete_rows
self.on_commit_drop = on_commit_drop
#compiles(CreateTableAs)
def s_create_table_as(element, compiler, **kw):
"""Compile the statement."""
text = compiler.visit_select(element)
spec = ['CREATE', 'TABLE', element.new_table_name, 'AS SELECT']
if element.is_temporary:
spec.insert(1, 'TEMPORARY')
on_commit = None
if element.on_commit_delete_rows:
on_commit = 'ON COMMIT DELETE ROWS'
elif element.on_commit_drop:
on_commit = 'ON COMMIT DROP'
if on_commit:
spec.insert(len(spec)-1, on_commit)
text = text.replace('SELECT', ' '.join(spec))
return text

How to save design document / view in CouchDB database from Scala program?

I have written a scala program for creating new database, and adding documents/views into it.
object CouchDBTest extends App {
val dbSession = new Session("localhost", 5984)
val db = dbSession.createDatabase("couchschooltest")
val newC1 = new Document
newC1.put("Type", "Class")
newC1.put("ClassId", "C1")
newC1.put("ClassName", "C-2A")
newC1.put("ClassTeacher", "T1")
newC1.accumulate("Students", "S1");
newC1.accumulate("Students", "S2");
newC1.accumulate("Students", "S3");
db.saveDocument(newC1)
val viewDocClass = new Document
viewDocClass.addView("Class", "function(doc) {if(doc.Type == 'Class') { emit([doc.ClassId, doc.ClassName, doc.ClassTeacher, doc.Students], doc);}}")
db.saveDocument(viewDocClass)
}
When I run this code, it creates the new database in CouchDB and adds the class document in this database. However, it doesn't add the views into the database. It gives the runtime error while adding the viewDocClass as
Error adding document - null null
For this I used couchdb4j API