Django headache with simple non-ascii string - unicode

I just created the following model:
class Categoria(models.Model):
nombre=models.CharField(max_length=30)
padre=models.ForeignKey('self', blank=True, null=True)
def __unicode__(self):
return self.nombre
Then registered to the admin interface and syncdb'd
Everything ok if I just add plain ASCII chars. But if I add a "Categoria" named "á" (to say something) I get:
Environment:
Request Method: GET
Request URL: http://192.168.2.103:8000/administracion/locales/categoria/
Django Version: 1.1.1
Python Version: 2.6.4
Installed Applications:
['django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.admin',
'cruzandoelsuquiaDJ.locales']
Installed Middleware:
('django.middleware.common.CommonMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware')
Template error:
In template /usr/lib/pymodules/python2.6/django/contrib/admin/templates/admin/change_list.html, error at line 78
Caught an exception while rendering: ('ascii', '\xc3\xa1', 0, 1, 'ordinal not in range(128)')
68 : {% endif %}
69 : {% endblock %}
70 :
71 : <form action="" method="post"{% if cl.formset.is_multipart %} enctype="multipart/form-data"{% endif %}>
72 : {% if cl.formset %}
73 : {{ cl.formset.management_form }}
74 : {% endif %}
75 :
76 : {% block result_list %}
77 : {% if action_form and actions_on_top and cl.full_result_count %}{% admin_actions %}{% endif %}
78 : {% result_list cl %}
79 : {% if action_form and actions_on_bottom and cl.full_result_count %}{% admin_actions %}{% endif %}
80 : {% endblock %}
81 : {% block pagination %}{% pagination cl %}{% endblock %}
82 : </form>
83 : </div>
84 : </div>
85 : {% endblock %}
86 :
Traceback:
File "/usr/lib/pymodules/python2.6/django/core/handlers/base.py" in get_response
92. response = callback(request, *callback_args, **callback_kwargs)
File "/usr/lib/pymodules/python2.6/django/contrib/admin/options.py" in wrapper
226. return self.admin_site.admin_view(view)(*args, **kwargs)
File "/usr/lib/pymodules/python2.6/django/views/decorators/cache.py" in _wrapped_view_func
44. response = view_func(request, *args, **kwargs)
File "/usr/lib/pymodules/python2.6/django/contrib/admin/sites.py" in inner
186. return view(request, *args, **kwargs)
File "/usr/lib/pymodules/python2.6/django/contrib/admin/options.py" in changelist_view
986. ], context, context_instance=context_instance)
File "/usr/lib/pymodules/python2.6/django/shortcuts/__init__.py" in render_to_response
20. return HttpResponse(loader.render_to_string(*args, **kwargs), **httpresponse_kwargs)
File "/usr/lib/pymodules/python2.6/django/template/loader.py" in render_to_string
108. return t.render(context_instance)
File "/usr/lib/pymodules/python2.6/django/template/__init__.py" in render
178. return self.nodelist.render(context)
File "/usr/lib/pymodules/python2.6/django/template/__init__.py" in render
779. bits.append(self.render_node(node, context))
File "/usr/lib/pymodules/python2.6/django/template/debug.py" in render_node
71. result = node.render(context)
File "/usr/lib/pymodules/python2.6/django/template/loader_tags.py" in render
97. return compiled_parent.render(context)
File "/usr/lib/pymodules/python2.6/django/template/__init__.py" in render
178. return self.nodelist.render(context)
File "/usr/lib/pymodules/python2.6/django/template/__init__.py" in render
779. bits.append(self.render_node(node, context))
File "/usr/lib/pymodules/python2.6/django/template/debug.py" in render_node
71. result = node.render(context)
File "/usr/lib/pymodules/python2.6/django/template/loader_tags.py" in render
97. return compiled_parent.render(context)
File "/usr/lib/pymodules/python2.6/django/template/__init__.py" in render
178. return self.nodelist.render(context)
File "/usr/lib/pymodules/python2.6/django/template/__init__.py" in render
779. bits.append(self.render_node(node, context))
File "/usr/lib/pymodules/python2.6/django/template/debug.py" in render_node
71. result = node.render(context)
File "/usr/lib/pymodules/python2.6/django/template/loader_tags.py" in render
24. result = self.nodelist.render(context)
File "/usr/lib/pymodules/python2.6/django/template/__init__.py" in render
779. bits.append(self.render_node(node, context))
File "/usr/lib/pymodules/python2.6/django/template/debug.py" in render_node
71. result = node.render(context)
File "/usr/lib/pymodules/python2.6/django/template/loader_tags.py" in render
24. result = self.nodelist.render(context)
File "/usr/lib/pymodules/python2.6/django/template/__init__.py" in render
779. bits.append(self.render_node(node, context))
File "/usr/lib/pymodules/python2.6/django/template/debug.py" in render_node
81. raise wrapped
Exception Type: TemplateSyntaxError at /administracion/locales/categoria/
Exception Value: Caught an exception while rendering: ('ascii', '\xc3\xa1', 0, 1, 'ordinal not in range(128)')
My django version is 1.1 and my database is 5.1.37-1ubuntu5 with utf8 charset and the table is using a utf8_bin collation.
This problem seems too basic to be true, and I'm a django newbie so I'm sorry in advance if I'm missing something very simple :)

Django generally has very good Unicode support (see the Django 1.1 "Unicode data" documentation for details). In my code I find that, if I'm having a problem with simple Unicode features, the problem usually is that I'm not understanding Django's details well, not that Django has a bug in its Unicode support.
The "Unicode Data" page tells us that "All of Django’s database backends ... automatically convert strings retrieved from the database into Python Unicode strings. You don’t even need to tell Django what encoding your database uses: that is handled transparently." So your simple return self.nombre should return a Python Unicode string.
However, the Django 1.1 "Databases" page has an important note about how the MySQL backend handles the utf8_bin collation:
...if you really want case-sensitive
comparisons on a particular column or
table, you would change the column or
table to use the utf8_bin collation.
The main thing to be aware of in this
case is that if you are using MySQLdb
1.2.2, the database backend in Django will then return bytestrings (instead
of unicode strings) for any character
fields it returns receive from the
database. This is a strong variation
from Django's normal practice of
always returning unicode strings. It
is up to you, the developer, to handle
the fact that you will receive
bytestrings if you configure your
table(s) to use utf8_bin collation.
Django itself should work smoothly
with such columns, but if your code
must be prepared to call
django.utils.encoding.smart_unicode()
at times if it really wants to work
with consistent data ...
So, in your original example, the column "nombre" used utf8_bin collation. This meant that self.nombre was returning a Python byte string. When you put it in an expression that required a Python Unicode string, Python performed its default conversion. This is the equivalent of self.nombre.decode('ascii'). And of course, .decode('ascii') fails when it encounters any byte above 0x7F, such as the UTF-8 bytes which encode "á".
You discovered the two ways to solve this problem. The first is to convert the Python byte string returned by self.nombre into a Python Unicode string explicitly. I'll bet the following simpler code would have worked:
return self.nombre.decode('utf8')
The second approach is to change the MySQL collation for column "nombre", which causes Django's MySQL backend to return Python Unicode strings instead of the unusual byte strings. Then your original expression gives a Python Unicode string:
return self.nombre
Hope this helps.

This problem can be solved by a little bit changing of django's code.
Add code below in django/utils/encoding.py
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

I had this issue in production and never on the development server.
Then I realized that new tables were created with utf8_bin collation instead of utf8_general_ci.
To see which tables require conversion, type
SHOW TABLE STATUS;
Then convert those with utf8_bin collation by typing
ALTER TABLE app_table CONVERT TO CHARACTER SET utf8 COLLATE utf8_general_ci;
Finally, change the default collation so this doesn't happen again:
ALTER DATABASE my_database character set utf8 collate utf8_general_ci;

Ok...
return u"%s"%(self.nombre.decode('utf8'),)
does the trick.
But also found that changing utf8_bin to utf8_general_ci does the trick, i.e. self.nombre works as expected.

I solved by problem by simply creating an Admin Model for the Model and including all variables in "list_display".

I had similar issue when recently changed a MySQL table to use collation utf8_bin in staging while no problem in dev (python2.7, Django1.4.2 in both environments). I found out that in dev I have MySQL-python 1.2.4c1 and in staging I have 1.2.3. Upgrading to MySQL-python 1.2.4 solved the problem for me.

Related

html2pdf not showing character correctly, encoding for ē

I'm struggling with some characters in a PDF I'm trying to create with html2pdf. The following code creates the PDF, but ē is shown an e.
$html2pdf=new Html2Pdf();
$html2pdf->writeHTML('<h1>Fēnix</h1>');
$html2pdf->output();
When getting the name from my database, ē is shown a ?.
$query=$mysqli->query('SELECT name FROM table WHERE id=1;');
$result=$query->fetch_assoc();
$html2pdf=new Html2Pdf();
$html2pdf->writeHTML('<h1>'.$result['name'].'</h1>');
$html2pdf->output();
This is the way I connect to my database:
$mysqli=new mysqli('host', 'user', 'pass', 'db');
I have also tried adding a charset:
$mysqli->set_charset('utf8');
Or initiating the class with parameters:
$html2pdf=new Html2Pdf('P', 'A4', 'nl');
$html2pdf=new Html2Pdf('P', 'A4', 'nl', true, 'UTF8');
Other characters that are giving issues are: Ś ą ł ś
Both server and database are UTF-8.
The solution is to apply a UTF-8 font to all elements.
* { font-family:freeserif; }

flask form post : cannot submit Chinese charactor, only English words works

I cannot submit Chinese words in the form.
English word is OK.
How can I set the utf-8 ?
In the html or app.py? or html code?
Here is my page: http://shiqiu.pw/testpage
PS: I can submit Chinese word in my mac, no problem. But when I deploy my code to my server, Chinese word can not be submitted, it shows Internal Server Error
PPS:I send my post words to mysql, and then show mysql data in new page, perhaps there is something need to set with my mysql?
#myapp.py code part
with connection.cursor() as cursor:
# Create a new recrod
word = request.form.get('word')
print('word')
print(word)
meaning = request.form.get('meaning')
sql = 'INSERT INTO sqdict (word, meaning) VALUES (%s, %s)'
cursor.execute(sql, (word, meaning)) # execute
# connection is not autocommit by default. So you must commit to save your changes.
connection.commit()
with connection.cursor() as cursor:
sql = 'SELECT * from sqdict'
cursor.execute(sql)
# sqlresult = cursor.fetchone() # only show the first row
sqlresult = cursor.fetchall() # all rows
print('sqlresult')
print(sqlresult)
allwords = sqlresult
ERROR LOG BELOW
word
工作
ERROR:flask.app:Exception on /test [POST]
Traceback (most recent call last):
File "/srv/data/web/vhosts/default/local/lib/python3.7/site-packages/flask/app.py", line 1993, in make_response
rv = self.response_class.force_type(rv, request.environ)
File "/srv/data/web/vhosts/default/local/lib/python3.7/site-packages/werkzeug/wrappers/base_response.py", line 269, in force_type
response = BaseResponse(*_run_wsgi_app(response, environ))
File "/srv/data/web/vhosts/default/local/lib/python3.7/site-packages/werkzeug/test.py", line 1119, in run_wsgi_app
app_rv = app(environ, start_response)
TypeError: 'InternalError' object is not callable
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/srv/data/web/vhosts/default/local/lib/python3.7/site-packages/flask/app.py", line 2311, in wsgi_app
response = self.full_dispatch_request()
File "/srv/data/web/vhosts/default/local/lib/python3.7/site-packages/flask/app.py", line 1835, in full_dispatch_request
return self.finalize_request(rv)
File "/srv/data/web/vhosts/default/local/lib/python3.7/site-packages/flask/app.py", line 1850, in finalize_request
response = self.make_response(rv)
File "/srv/data/web/vhosts/default/local/lib/python3.7/site-packages/flask/app.py", line 2001, in make_response
reraise(TypeError, new_error, sys.exc_info()[2])
File "/srv/data/web/vhosts/default/local/lib/python3.7/site-packages/flask/_compat.py", line 35, in reraise
raise value.with_traceback(tb)
File "/srv/data/web/vhosts/default/local/lib/python3.7/site-packages/flask/app.py", line 1993, in make_response
rv = self.response_class.force_type(rv, request.environ)
File "/srv/data/web/vhosts/default/local/lib/python3.7/site-packages/werkzeug/wrappers/base_response.py", line 269, in force_type
response = BaseResponse(*_run_wsgi_app(response, environ))
File "/srv/data/web/vhosts/default/local/lib/python3.7/site-packages/werkzeug/test.py", line 1119, in run_wsgi_app
app_rv = app(environ, start_response)
TypeError: 'InternalError' object is not callable
The view function did not return a valid response. The return type must be a string, tuple, Response instance, or WSGI callable, but it was a InternalError.
My input Chinese charactor is showed as '工作'
I don't know which is the Internalerror object.
Full code is at github,code maybe not the same, but not big difference.
I have tried request.form.getunicode('word').
Also, I tried app.config['JSON_AS_ASCII' = False).
Or, set <meta charset="UTF-8"> in the html code.
BUT, no one works.
I hope the form can support Chinese words as value.

chatterbot twitter_trainer ASCII encoding error

I am trying to run the chatterbot's TwitterTrainer on a separate program like so:
from chatterbot import ChatBot
from chatterbot.trainers import TwitterTrainer
from settings import TWITTER
import logging
# Comment out the following line to disable verbose logging
logging.basicConfig(level=logging.INFO)
chatbot = ChatBot("TwitterBot",
logic_adapters=[
"chatterbot.logic.BestMatch"
],
input_adapter="chatterbot.input.TerminalAdapter",
output_adapter="chatterbot.output.TerminalAdapter",
database="./twitter-database.db",
twitter_consumer_key=TWITTER["CONSUMER_KEY"],
twitter_consumer_secret=TWITTER["CONSUMER_SECRET"],
twitter_access_token_key=TWITTER["ACCESS_TOKEN"],
twitter_access_token_secret=TWITTER["ACCESS_TOKEN_SECRET"],
trainer="chatterbot.trainers.TwitterTrainer",
random_seed_word="random"
)
chatbot.train()
chatbot.logger.info('Trained database generated successfully!')
And i get errors that look like that:
File "C:\Python27\lib\json\decoder.py", line 364, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end()) File "C:\Python27\lib\json\decoder.py", line 380, in raw_decode
obj, end = self.scan_once(s, idx) UnicodeDecodeError: 'utf8' codec can't decode byte 0x85 in position 94: invalid start byte
This program doesn't run more than 3 seconds straight, but some tweets are written to the twitter-database.db until exception occurs.
Also when looking at the trainer.py i saw this:
# TODO: Handle non-ascii characters properly
Any ideas about why this happens and how can i fix this?
Could you try to add Python Source Code Encoding top of your file # -*- coding: utf-8 -*-. These type error will occurs due to this. More information available here http://chatterbot.readthedocs.io/en/stable/encoding.html#fixing-encoding-errors

PHPUnit - assertTablesEqual encoding accents

I'm using:
PHPUnit 3.6.12 / PHP 5.3.1 / MySQL 5.1.30
I'm trying to compare the value inserted by a function in a database with the value I expect.
The value is a string CONTAINING ACCENTS.
So I created a xml file: expectedValue.xml (file encoded in UTF-8)
<?xml version="1.0" encoding="UTF-8"?>
<dataset>
<table name="MyTable">
<column>MyColumn</column>
<row>
<value>résumé</value>
</row>
</table>
</dataset>
Here is the code in the test method (file encoded in UTF-8 too)
public function testSave()
{
// this function saves the data in an UTF-8 database
save('résumé');
$queryTable = $this->getConnection()->createQueryTable('MyTable', 'SELECT MyColumn FROM MyTable') ;
$expectedTable = $this->createXMLDataSet('expectedValue.xml)->getTable('MyTable') ;
$this->assertTablesEqual($expectedTable, $queryTable) ;
}
And here is the result I get:
Failed asserting that
MYTable
MyColumn
résumé
is equal to expected
MyTable
MyColumn
résumé
Does anyone know where this encoding problem may come from ??
Thanks !!
Could possibly be the database connection
When you're connecting to MySQL (in your getConnection() method), you need to make sure you explicitly set UTF-8.
$pdo = new PDO(
'mysql:host=hostname;dbname=defaultDbName',
'username',
'password',
array(PDO::MYSQL_ATTR_INIT_COMMAND => "SET NAMES utf8")
);
If you're not using MySQL, you can search for ways to set the charset.

Can tornado handle pagination?

Django REST framework let you set a Pagination.
When we make a request it returns a JSON object, with a 'paging' attribute that has 'next' and/or 'previous' attributes containing urls of other pages (if they exist).
Does Tornado provide an option to set Pagination?
I have searched the web for tornado pagination example, but could not find any.
Any help is appreciated.
Thanking you in advance.
Nope, because pagination needs to work pretty closely with the database, and Tornado is DB agnostic.
Here's a UI module that I use; you need a count of your total results, as well as the results for the page (this doesn't limit those results for you, if just builds the pagination links):
from __future__ import division
import math
import urlparse
import urllib
import tornado.web
def update_querystring(url, **kwargs):
base_url = urlparse.urlsplit(url)
query_args = urlparse.parse_qs(base_url.query)
query_args.update(kwargs)
for arg_name, arg_value in kwargs.iteritems():
if arg_value is None:
if query_args.has_key(arg_name):
del query_args[arg_name]
query_string = urllib.urlencode(query_args, True)
return urlparse.urlunsplit((base_url.scheme, base_url.netloc,
base_url.path, query_string, base_url.fragment))
class Paginator(tornado.web.UIModule):
"""Pagination links display."""
def render(self, page, page_size, results_count):
pages = int(math.ceil(results_count / page_size)) if results_count else 0
def get_page_url(page):
# don't allow ?page=1
if page <= 1:
page = None
return update_querystring(self.request.uri, page=page)
next = page + 1 if page < pages else None
previous = page - 1 if page > 1 else None
return self.render_string('uimodules/pagination.html', page=page, pages=pages, next=next,
previous=previous, get_page_url=get_page_url)
Here's the module template (uimodules/pagination.html in the above example):
{% if pages > 1 %}
<div class="pagination pagination-centered">
<ul>
<li{% if previous %}>«{% else %} class="disabled"><span>«</span></li>{% end %}
{% for page_num in xrange(1, pages + 1) %}{# 1-index range #}
<li{% if page_num != page %}>{{ page_num }}{% else %} class="active"><span>{{ page_num }}</span></li>{% end %}
{% end %}
<li{% if next %}>»{% else %} class="disabled"><span>»</span></li>{% end %}
</ul>
</div>
{% end %}
Don't forget to tell your tornado app about the module.
Finally, to actually use it:
{% for result in results %}
<p>{{ result }}</p>
{% end %}
{% module Paginator(page, page_size, results_count) %}
Hope that helps!