I'm using latest sqlalchemy and latest pymssql from pip to connect mssql server 8.00.2039 (2005?) The difficulty is table and column names are in russian. Is it possible to handle this database with sqlalchemy? At least i have to make 'select ... where' queries.
engine = create_engine("mssql+pymssql://%s:%s#RTBD/rt?charset=utf8" % (settings.RT_USER, settings.RT_PWD), echo = True, encoding = 'utf8')
metadata = MetaData()
metadata.reflect(engine, only = [u"Заказы",])
orders = metadata.tables[u'Заказы']
res = engine.execute(orders.select(orders.c[u'Номер заказа'] == u'14-01-0001'))
Exception is
ValueError Traceback (most recent call last)
<ipython-input-8-50ce93243d1c> in <module>()
----> 1 engine.execute(orders.select(orders.c[orders.columns.keys()[0]] == u'14-01-0001'))
python2.7/site-packages/sqlalchemy/engine/base.pyc in execute(self, statement, *multiparams, **params)
1680
1681 connection = self.contextual_connect(close_with_result=True)
-> 1682 return connection.execute(statement, *multiparams, **params)
1683
1684 def scalar(self, statement, *multiparams, **params):
python2.7/site-packages/sqlalchemy/engine/base.pyc in execute(self, object, *multiparams, **params)
718 type(object))
719 else:
--> 720 return meth(self, multiparams, params)
721
722 def _execute_function(self, func, multiparams, params):
python2.7/site-packages/sqlalchemy/sql/elements.pyc in _execute_on_connection(self, connection, multiparams, params)
315
316 def _execute_on_connection(self, connection, multiparams, params):
--> 317 return connection._execute_clauseelement(self, multiparams, params)
318
319 def unique_params(self, *optionaldict, **kwargs):
python2.7/site-packages/sqlalchemy/engine/base.pyc in _execute_clauseelement(self, elem, multiparams, params)
815 compiled_sql,
816 distilled_params,
--> 817 compiled_sql, distilled_params
818 )
819 if self._has_events or self.engine._has_events:
python2.7/site-packages/sqlalchemy/engine/base.pyc in _execute_context(self, dialect, constructor, statement, parameters, *args)
945 parameters,
946 cursor,
--> 947 context)
948
949 if self._has_events or self.engine._has_events:
python2.7/site-packages/sqlalchemy/engine/base.pyc in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1109 )
1110
-> 1111 util.reraise(*exc_info)
1112
1113 finally:
python2.7/site-packages/sqlalchemy/engine/base.pyc in _execute_context(self, dialect, constructor, statement, parameters, *args)
938 statement,
939 parameters,
--> 940 context)
941 except Exception as e:
942 self._handle_dbapi_exception(
python2.7/site-packages/sqlalchemy/engine/default.pyc in do_execute(self, cursor, statement, parameters, context)
433
434 def do_execute(self, cursor, statement, parameters, context=None):
--> 435 cursor.execute(statement, parameters)
436
437 def do_execute_no_params(self, cursor, statement, context=None):
python2.7/site-packages/pymssql.so in pymssql.Cursor.execute (pymssql.c:6057)()
python2.7/site-packages/_mssql.so in _mssql.MSSQLConnection.execute_query (_mssql.c:9858)()
python2.7/site-packages/_mssql.so in _mssql.MSSQLConnection.execute_query (_mssql.c:9734)()
python2.7/site-packages/_mssql.so in _mssql.MSSQLConnection.format_and_run_query (_mssql.c:10814)()
python2.7/site-packages/_mssql.so in _mssql.MSSQLConnection.format_sql_command (_mssql.c:11042)()
python2.7/site-packages/_mssql.so in _mssql._substitute_params (_mssql.c:18359)()
<type 'str'>: (<type 'exceptions.UnicodeEncodeError'>, UnicodeEncodeError('ascii', u'params dictionary did not contain value for placeholder: \u041d\u043e\u043c\u0435\u0440 \u0437\u0430\u043a\u0430\u0437\u0430_1', 57, 62, 'ordinal not in range(128)'))
The query is right and ends with WHERE [Заказы].[Номер заказа] = %(Номер заказа_1)s
But info message from sqla is INFO sqlalchemy.engine.base.Engine {'\xd0\x9d\xd0\xbe\xd0\xbc\xd0\xb5\xd1\x80 \xd0\xb7\xd0\xb0\xd0\xba\xd0\xb0\xd0\xb7\xd0\xb0_1': '14-01-0001'}
The strings \xd0\x9d\xd0\xbe\xd0\xbc\xd0\xb5\xd1\x80 \xd0\xb7\xd0\xb0\xd0\xba\xd0\xb0\xd0\xb7\xd0\xb0_1 and \u041d\u043e\u043c\u0435\u0440 \u0437\u0430\u043a\u0430\u0437\u0430_1 are equal to Номер заказа_1
as stated on the mailing list, FreeTDS and such are very picky about this. The following test works for me but for the poster above it did not work:
UnixODBC 2.3.0
FreeTDS 0.91
Pyodbc 3.0.7
Linux, not OSX, OSX has tons of problems with tds / pyodbc, I’m running on a Fedora 14 machine here
Freetds setting:
[sqlserver_2008_vmware]
host = 172.16.248.142
port = 1213
tds version = 7.2
client charset = UTF8
text size = 50000000
Test script:
# coding: utf-8
from sqlalchemy import create_engine, MetaData, Table, Column, String
e = create_engine("mssql+pyodbc://scott:tiger#ms_2008", echo=True)
#e = create_engine("mssql+pymssql://scott:tiger#172.16.248.142:1213", echo=True)
m = MetaData()
t = Table(u'Заказы', m, Column(u'Номер заказа', String(50)))
m.drop_all(e)
m.create_all(e)
orders = m.tables[u'Заказы']
e.execute(orders.select(orders.c[u'Номер заказа'] == u'14-01-0001'))
part of the output:
CREATE TABLE [Заказы] (
[Номер заказа] VARCHAR(50) NULL
)
2014-03-31 20:57:16,266 INFO sqlalchemy.engine.base.Engine ()
2014-03-31 20:57:16,268 INFO sqlalchemy.engine.base.Engine COMMIT
2014-03-31 20:57:16,270 INFO sqlalchemy.engine.base.Engine SELECT [Заказы].[Номер заказа]
FROM [Заказы]
WHERE [Заказы].[Номер заказа] = ?
2014-03-31 20:57:16,270 INFO sqlalchemy.engine.base.Engine (u'14-01-0001',)
Related
I have some code to query a db2 database that works if I don't include "for fetch only," but returns an error if I do. I was wondering if it's already being done, or how I could set it.
connection_url = f"jdbc:db2://{host}:{port}/{database}:user={username};password={password};"
df = (spark
.read
.format("jdbc")
.option("driver", "com.ibm.db2.jcc.DB2Driver")
.option("url",connection_url)
.option("query",query)
.load())
return(df)
Error when I include for fetch only:
com.ibm.db2.jcc.am.SqlSyntaxErrorException: DB2 SQL Error: SQLCODE=-104, SQLSTATE=42601, SQLERRMC=for;
and the detailed is:
/databricks/spark/python/pyspark/sql/readwriter.py in load(self, path, format, schema, **options)
162 return self._df(self._jreader.load(self._spark._sc._jvm.PythonUtils.toSeq(path)))
163 else:
--> 164 return self._df(self._jreader.load())
165
166 def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
/databricks/spark/python/lib/py4j-0.10.9.1-src.zip/py4j/java_gateway.py in __call__(self, *args)
1302
1303 answer = self.gateway_client.send_command(command)
-> 1304 return_value = get_return_value(
1305 answer, self.gateway_client, self.target_id, self.name)
1306
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
115 def deco(*a, **kw):
116 try:
--> 117 return f(*a, **kw)
118 except py4j.protocol.Py4JJavaError as e:
119 converted = convert_exception(e.java_exception)
/databricks/spark/python/lib/py4j-0.10.9.1-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
Py4JJavaError: An error occurred while calling o4192.load.
: com.ibm.db2.jcc.am.SqlSyntaxErrorException: DB2 SQL Error: SQLCODE=-104, SQLSTATE=42601, SQLERRMC=for;
;), DRIVER=4.25.13
at com.ibm.db2.jcc.am.b6.a(b6.java:810)
at com.ibm.db2.jcc.am.b6.a(b6.java:66)
at com.ibm.db2.jcc.am.b6.a(b6.java:140)
at com.ibm.db2.jcc.am.k3.c(k3.java:2824)
at com.ibm.db2.jcc.am.k3.d(k3.java:2808)
at com.ibm.db2.jcc.am.k3.a(k3.java:2234)
at com.ibm.db2.jcc.am.k4.a(k4.java:8242)
at com.ibm.db2.jcc.t4.ab.i(ab.java:206)
at com.ibm.db2.jcc.t4.ab.b(ab.java:96)
at com.ibm.db2.jcc.t4.p.a(p.java:32)
at com.ibm.db2.jcc.t4.av.i(av.java:150)
at com.ibm.db2.jcc.am.k3.al(k3.java:2203)
at com.ibm.db2.jcc.am.k4.bq(k4.java:3730)
at com.ibm.db2.jcc.am.k4.a(k4.java:4609)
at com.ibm.db2.jcc.am.k4.b(k4.java:4182)
at com.ibm.db2.jcc.am.k4.bd(k4.java:780)
at com.ibm.db2.jcc.am.k4.executeQuery(k4.java:745)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.getQueryOutputSchema(JDBCRDD.scala:68)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.resolveTable(JDBCRDD.scala:58)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation$.getSchema(JDBCRelation.scala:241)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:36)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:385)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:356)
at org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:323)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:323)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:222)
at sun.reflect.GeneratedMethodAccessor704.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
at py4j.Gateway.invoke(Gateway.java:295)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:251)
at java.lang.Thread.run(Thread.java:750)
I've searched ibm's documentation, and stack overflow using every possible permutation I can think of.
I've read documentation about setting the isolation level since I also get a failure when running queries with with ur and was thinking that that if I could find out why that fails, I'd understand why for fetch only fails, (there's an answer here ) but it makes things clear as mud because I couldn't use it to find an analogous solution for for fetch only
I've looked at the db2 documentation on ibm's website, and searched stack overflow but this is eluding me.
edit: queries that run and don't run
Runs in dbvisualizer and pyspark
select
id_number
from
myschema.mytable
FETCH FIRST
10 ROWS ONLY
another one
select
id_number
from
myschema.mytable
Runs in dbvisualizer but not in pyspark
select
id_number
from
myschema.mytable
FETCH FIRST
10 ROWS ONLY FOR FETCH ONLY
another one
select
id_number
from
myschema.mytable
FOR FETCH ONLY
edit 2:
an example is that I run this code:
connection_url = f"jdbc:db2://{host}:{port}/{database}:user={username};password={password};"
df = (spark
.read
.format("jdbc")
.option("driver", "com.ibm.db2.jcc.DB2Driver")
.option("url",connection_url)
.option("query","""
select
id_number
from
myschema.mytable
FOR FETCH ONLY
""")
.load())
return(df)
and it doesn't work. and then I run this code:
connection_url = f"jdbc:db2://{host}:{port}/{database}:user={username};password={password};"
df = (spark
.read
.format("jdbc")
.option("driver", "com.ibm.db2.jcc.DB2Driver")
.option("url",connection_url)
.option("query","""
select
id_number
from
myschema.mytable
-- FOR FETCH ONLY
""")
.load())
return(df)
and it does work. and then I went into dbvisualizer, and verified that both versions of the query do work, so it's not a syntax error from what I can tell.
dbvisualizer says the database major version is 12 and minor is 1 and I believe it's z/os. I'm using the jdbc driver version 4.25.13 in both pyspark and dbvisualizer downloaded from maven here
edit 3:
this query runs fine in db visualizer, but fails in pyspark.
select
id_number
from
myschema.mytable
FOR READ ONLY
Alright. I found out what's happening. tl;dr: spark already does it.
The documentation here states:
A query that will be used to read data into Spark. The specified query will be parenthesized and used as a subquery in the FROM clause. Spark will also assign an alias to the subquery clause. As an example, spark will issue a query of the following form to the JDBC Source.
SELECT FROM (<user_specified_query>) spark_gen_alias
I'm fairly certain the relevant code is here:
val sqlText = options.prepareQuery +
s"SELECT $columnList FROM ${options.tableOrQuery} $myTableSampleClause" +
s" $myWhereClause $getGroupByClause $getOrderByClause $myLimitClause $myOffsetClause"
So FOR FETCH ONLY falls within the subquery, which is not allowed in DB2.
Fortunately though, it looks like CONCUR_READ_ONLY jdbc option is set, which is equivalent to FOR READ ONLY per documentation here
JDBC setting
Db2® cursor setting
IBM Informix® cursor setting
CONCUR_READ_ONLY
FOR READ ONLY
FOR READ ONLY
CONCUR_UPDATABLE
FOR UPDATE
FOR UPDATE
HOLD_CURSORS_OVER_COMMIT
WITH HOLD
WITH HOLD
TYPE_FORWARD_ONLY
SCROLL not specified
SCROLL not specified
TYPE_SCROLL_INSENSITIVE
INSENSITIVE SCROLL
SCROLL
TYPE_SCROLL_SENSITIVE
SENSITIVE STATIC, SENSITIVE DYNAMIC, or ASENSITIVE, depending on the cursorSensitivity Connection and DataSource property
Not supported
The relevant code in spark is:
stmt = conn.prepareStatement(sqlText,
ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)
from here
As a side note, it looks like even if it wasn't specified explicitly in the code above, CONCUR_READ_ONLY is the default flag for ResultSet in java sql:
Concurrency
Description
ResultSet.CONCUR_READ_ONLY
Creates a read-only result set. This is the default
ResultSet.CONCUR_UPDATABLE
Creates an updateable result set.
source
according the doc in [url:https://pola-rs.github.io/polars-book/user-guide/howcani/io/read_db.html]
import polars as pl
conn = "postgres://username:password#server:port/database"
query = "SELECT * FROM foo"
pl.read_sql(query, conn)
---------------------------------------------------------------------------
PanicException Traceback (most recent call last)
Input In [120], in <module>
1 import connectorx
----> 2 df = pl.read_sql(query, conn)
File e:\bokeh\venv\lib\site-packages\polars\io.py:969, in read_sql(sql, connection_uri, partition_on, partition_range, partition_num, protocol)
903 """
904 Read a SQL query into a DataFrame.
905 Make sure to install connectorx>=0.2
(...)
966
967 """
968 if _WITH_CX:
--> 969 tbl = cx.read_sql(
970 conn=connection_uri,
971 query=sql,
972 return_type="arrow",
973 partition_on=partition_on,
974 partition_range=partition_range,
975 partition_num=partition_num,
976 protocol=protocol,
977 )
978 return from_arrow(tbl) # type: ignore[return-value]
979 else:
File e:\bokeh\venv\lib\site-packages\connectorx\__init__.py:151, in read_sql(conn, query, return_type, protocol, partition_on, partition_range, partition_num, index_col)
148 except ModuleNotFoundError:
149 raise ValueError("You need to install pyarrow first")
--> 151 result = _read_sql(
152 conn,
153 "arrow" if return_type in {"arrow", "polars"} else "arrow2",
154 queries=queries,
155 protocol=protocol,
156 partition_query=partition_query,
157 )
158 df = reconstruct_arrow(result)
159 if return_type == "polars":
**PanicException: called `Result::unwrap()` on an `Err` value: Error { kind: ConfigParse, cause: Some("unexpected EOF") }**
Postgresql is 14.2, OS is Windows 10.
The postgresql server is running well, I can psql database username in cmd.
According to the doc, I pip install pyarrow, connectorx.
I am trying to run the below command in spark sql in my pyspark notebook (databricks) and it is getitng an error but the same command is working in sql notebook.
ALTER TABLE sales.product OWNER TO `john001#mycomp.com`;
Pyspark Code below
source_sql = "ALTER TABLE sales.product OWNER TO `john001#mycomp.com`;"
spark.Sql(source_sql)
running the above print statement in spark.sql is throwing an error as shown below
----> 7 spark.sql(source_sql)
/databricks/spark/python/pyspark/sql/session.py in sql(self, sqlQuery)
707 [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
708 """
--> 709 return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
710
711 #since(2.0)
/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py in __call__(self, *args)
1303 answer = self.gateway_client.send_command(command)
1304 return_value = get_return_value(
-> 1305 answer, self.gateway_client, self.target_id, self.name)
1306
1307 for temp_arg in temp_args:
but if I run the same in %sql in the cell - it is working.
can someone suggest on how to run the same in spark.Sql("ALTER TABLE sales.product OWNER TO john001#mycomp.com;")
Spark SQL's ALTER TABLE command does not have the OWNER TO option. This is what's being executed in your pyspark code, and why it fails.
Databricks' ALTER TABLE command does have this option; it is a different SQL dialect. This is what's being executed in your sql notebook, and why it succeeds.
I am on datajoint python 0.13.1.
When executing .alter() on a table in my schema I am getting the following error message:
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-43-f79406c4b690> in <module>
----> 1 MyTable.alter()
/opt/miniconda3/envs/analysis/lib/python3.6/site-packages/datajoint/table.py in alter(self, prompt, context)
102 del frame
103 old_definition = self.describe(context=context, printout=False)
--> 104 sql, external_stores = alter(self.definition, old_definition, context)
105 if not sql:
106 if prompt:
/opt/miniconda3/envs/analysis/lib/python3.6/site-packages/datajoint/user_tables.py in definition(self)
75 """
76 raise NotImplementedError(
---> 77 'Subclasses of Table must implement the property "definition"')
78
79 #ClassProperty
NotImplementedError: Subclasses of Table must implement the property "definition"
What am I doing wrong?
Makes sense. Currently, alter can only change secondary attributes. I cannot yet modify foreign keys, primary key, and indexes. Issue #901 explains this in part: https://github.com/datajoint/datajoint-python/issues/901
The workaround currently is to use SQL ALTER command, which you can issue using dj.conn().query(....). If you show your before and after table definitions, I will be able to generate the full ALTER command.
One of the problems was that I didn't load the schema code directly. However, then the error message changes to:
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-6-f79406c4b690> in <module>
----> 1 MyTable.alter()
/opt/miniconda3/envs/analysis/lib/python3.6/site-packages/datajoint/table.py in alter(self, prompt, context)
102 del frame
103 old_definition = self.describe(context=context, printout=False)
--> 104 sql, external_stores = alter(self.definition, old_definition, context)
105 if not sql:
106 if prompt:
/opt/miniconda3/envs/analysis/lib/python3.6/site-packages/datajoint/declare.py in alter(definition, old_definition, context)
370 raise NotImplementedError('table.alter cannot alter foreign keys (yet).')
371 if index_sql != index_sql_:
--> 372 raise NotImplementedError('table.alter cannot alter indexes (yet)')
373 if attribute_sql != attribute_sql_:
374 sql.extend(_make_attribute_alter(attribute_sql, attribute_sql_, primary_key))
NotImplementedError: table.alter cannot alter indexes (yet)
I am trying to access a database via postgresql2 with my jupyter notebook but I receive the following error message.
OperationalError: could not create SSL context: no such file
import pandas as pd
import psycopg2
#Connect to postgres
conn_string = "host='xx' sslmode='require' \
dbname='dbname' port='xx' user='xx' \
password='xx'"
#Create rework dataset
conn = psycopg2.connect(conn_string)
postgreSQL_select_Query = u'SELECT * FROM "xx"."yy"'
conn.set_client_encoding('UNICODE')
cursor = conn.cursor()
cursor.execute(postgreSQL_select_Query)
colnames = [desc[0] for desc in cursor.description]
df_imp = cursor.fetchall()
df = pd.DataFrame(data=df_imp, columns=colnames)
Expected result is the access to the database and generation of dataframe.
Actual result is OperationalError: could not create SSL context: no such file by step conn = psycopg2.connect(conn_string)
---------------------------------------------------------------------------
OperationalError Traceback (most recent call last)
<ipython-input-2-932b2fb01c9f> in <module>
5
6 #Create rework dataset
----> 7 conn = psycopg2.connect(conn_string)
8 postgreSQL_select_Query = u'SELECT * FROM "xx"."xx"'
9 conn.set_client_encoding('UNICODE')
~\AppData\Local\Continuum\anaconda3\lib\site-packages\psycopg2\__init__.py in connect(dsn, connection_factory, cursor_factory, **kwargs)
128
129 dsn = _ext.make_dsn(dsn, **kwargs)
--> 130 conn = _connect(dsn, connection_factory=connection_factory, **kwasync)
131 if cursor_factory is not None:
132 conn.cursor_factory = cursor_factory
OperationalError: could not create SSL context: No such process
After trying several solutions, the problem was the version of psycopg2 library.
conda update does not install the latest version of the library. However, pip does it and then my code works again!