convert html to json using rdd.map - pyspark

I have html file which I want to parse in pySpark.
Example:
<MainStruct Rank="1">
<Struct Name="A">
<Struct Name="AA">
<Struct Name="AAA">
<Field Name="F1">Data</Field>
</Struct>
<Struct Name="ListPart">
<List Name="ListName">
<Struct Name="S1">
<Field Name="F1">AAA</Field>
<Field Name="F2">BBB</Field>
<Field Name="F3">CCC</Field>
</Struct>
<Struct Name="S1">
<Field Name="F1">XXX</Field>
<Field Name="F2">GGG</Field>
<Field Name="F3">BBB</Field>
</Struct>
</List>
</Struct>
</Struct>
</Struct>
</FullStudy>
rdd_html = spark.sparkContext.wholeTextFiles(path_to_XML, minPartitions=1000, use_unicode=True)
df_html = spark.createDataFrame(rdd_html,['filename', 'content'])
rdd_map = df_html.rdd.map(lambda x: xmltodict(x['content'],'mainstruct'))
df_map = spark.createDataFrame(rdd_map)
df_map.display()
but in my Notebook output I have problem with list elements. They are parsed inсorrectly.
>object
>AA:
>ListPart:
ListName: "[{S1={F1=AAA, F2=BBB, F3=CCC}}, {S1={F1=XXX, F2=GGG, F3=BBB}}]"
>AAA:
F1: "Data"
List element represents as one string line.
My function to parse it:
def xmltodict(content,first_tag=''):
#Content from xml File
content = re.sub('\n', '', content)
content = re.sub('\r', '', content)
content = re.sub('>\s+<', '><', content)
data = unicodedata.normalize('NFKD', content)
soup = BeautifulSoup(data, 'lxml')
body = soup.find('body')
if(first_tag.strip()!=''):
struct = body.find(first_tag)
else:
struct=body
return parser(struct)
def parser(struct):
struct_all = struct.findAll(True, recursive=False)
struct_dict = {}
for strc in struct_all:
tag = strc.name
tag_name_prop = strc.attrs['name']
if tag == 'struct':
d = parser(strc)
el = {tag_name_prop: d}
struct_dict.update(el)
elif tag == 'field':
v = strc.text
struct_dict[tag_name_prop] = v
elif tag == 'list':
l_elem = []
for child in strc.contents:
soap_child = BeautifulSoup(str(child), 'lxml').find('body')
l_elem.append(parser(soap_child))
el = {tag_name_prop: l_elem}
struct_dict.update(el)
with open (result.txt,'w') as file:
file.write(json.dumps(struct_dict))
return struct_dict
the result in txt file is that I want to receive:
"A": { "AA": {
"AAA": {"F1": "Data"},
"ListPart": {
"ListName": [
{
"S1": {"F1": "AAA",
"F2": "BBB",
"F3": "CCC"
}
},
{
"S1": { "F1": "XXX",
"F2": "GGG",
"F3": "BBB"
}}]
}}}
but in my notebook output I have problem with list elements. They are parsed inсorrectly.
>object
>AA:
>ListPart:
ListName: "[{S1={F1=AAA, F2=BBB, F3=CCC}}, {S1={F1=XXX, F2=GGG, F3=BBB}}]"
>AAA:
F1: "Data"
Why list represents as one string line? Why are there "=" symbols instead of ":"?

i simplified this issue to that:
def parseList(row):
d = {}
d['el1']='AAA'
l = [{'x1':'XA'},{'x1':'XB'}]
d['el2']=l
return Row(res=d)
rdd_html = spark.sparkContext.wholeTextFiles(path_to_file_test, minPartitions=1000, use_unicode=True)
df_html = spark.createDataFrame(rdd_html,['filename', 'content'])
rdd_map = df_html.rdd.map(parseList2)
df_map = spark.createDataFrame(rdd_map)
df_map.display()
in result i also have
>object
el2: "[{x1=XA}, {x1=XB}]"
el1: "AAA"
not that
>object
>el2
x1:"XA"
x1:"XB"
el1: "AAA"

I finally resolved my problem.
The reason was that i should define schema and use it.
df_map = spark.createDataFrame(rdd_map,schema)

Related

How do you post form data using pytest?

I'm trying to write a unit test that posts form data. The actual line in question is:
def test_create_request():
with app.test_client() as test_client:
app_url = '/requests/'
with app.app_context():
new_request = get_new_request()
form_data = json.dumps(new_request, default=str)
print('FORM DATA: ', form_data)
resp = test_client.post(app_url, data=form_data, headers={'Content-Type': 'application/json'})
assert resp.status_code == 200
s = json.loads(resp.data)
assert s['success'] == True
Where new_request is a dict representation of an object. The print statement yields (I've formatted it a bit):
FORM DATA: {
"request_id": "6",
"state_id": 1,
"orig_project_id": "1",
"orig_project_code": "QQQ",
"orig_allocated_funding": "123.45",
"orig_ytd_spend": "123.45",
"orig_ytd_commit": "123.45",
"orig_ytd_ocnr": "123.45",
"new_project_id": 2,
"new_funding_amount": 123.45,
"new_ytd_spend": 123.45,
"new_ytd_commit": 123.45,
"new_ytd_ocnr": 123.45,
"plan": "this is the plan",
"reason": "this is the reason",
"sm_director": "sm.dir#example.com",
"submitted_by": "xfgbn#vexample.com",
"created_on": "2021-09-14 16:32:55",
"meets_approval_guidelines": null
}
In the flask form, most fields are required. When I try to post the data, the form.validate_on_submit() function in the view's route returns False, and the 2nd assert fails. WTForms claims that none of the required orig_allocated_funding, orig_ytd_spend, orig_ytd_commit, orig_ytd_ocnr, new_project_id, new_funding_amount, new_ytd_spend, new_ytd_commit, new_ytd_ocnr, reason, plan, sm_director, or submitted_by fields are supplied.
I've read several tutorials, and I can't see what I'm doing wrong. Can anyone help?
What I was able to make work was scraping form_data altogether:
def test_create_request():
with app.test_client() as test_client:
app_url = '/requests/'
with app.app_context():
new_request = get_new_request()
resp = test_client.post(app_url, data=new_request)
assert resp.status_code == 200
s = json.loads(resp.data)
print(s['html'])
assert s['success'] == True

computed field that does not compute and default that does not work (odoo-12)

I have this field with a method in my model which aims to detect the changes in the many2one:
manytoone = fields.Many2one(comodel_name="product.template", string="Many to One",
required=False, )
cambio_realizado = fields.Boolean(string="Cambios en Many to One", default=False)
#api.onchange('manytoone')
def _detectar_cambio(self):
self.cambio_realizado = True
No works the default=False in Boolean Field. Always start in True.
And the other problem is that no works a computed field:
#api.multi
def computer_function(self):
self.computation = 243+234
computation = fields.Float(string="Result: ", compute=computer_function, readonly=True)
The result is zero... always in the view.
identation ok, and the model is inherited
My Class and my view with the tips:
class CustomModule(models.Model):
_inherit = 'sale.order'
name = fields.Char('campos relacionales, decoradores onchange y funcion computar')
manytoone = fields.Many2one(comodel_name="product.template", string="Many to One",
required=False, )
cambio_realizado = fields.Boolean(string="Cambios en Many to One")
#api.onchange('manytoone')
def _detectar_cambio(self):
self.cambio_realizado = True
onetomany = fields.One2many(comodel_name="sale.order", inverse_name="manytoone",
string="One to Many", required=False, )
manytomany = fields.Many2many(comodel_name="sale.order",
relation="sale_handler",
column1="order_id", column2="order_handler_id",
string="Many to Many", )
#api.depends('computation')
def computer_function(self):
for record in self:
record.computation = 300
computation = fields.Float(string="Result: ", compute=computer_function, readonly=True)
<odoo>
<!-- Inherit Form View to Modify it -->
<record id="custom_view_custom" model="ir.ui.view">
<field name="name">custom.view.custom</field>
<field name="model">sale.order</field>
<field name="inherit_id" ref="sale.view_order_form"/>
<field name="arch" type="xml">
<xpath expr="//sheet" position="inside">
<group>
<field name="manytoone"/>
<field name="onetomany"/>
<field name="manytomany"/>
</group>
<group>
<field name="computation"/>
<field name="cambio_realizado"/>
</group>
</xpath>
</field>
</record>
</odoo>
#api.one
def computer_function(self):
self.computation = 243+234
computation = fields.Float(string="Result: ", compute=computer_function, readonly=True)
use #api.one in your code
1- onchnage, when you try to create a new record, the onchange called automatically then cambio_realizado set to true
#api.onchange('manytoone')
def _detectar_cambio(self):
for o in self:
if o.manytoone:
o.cambio_realizado = True
2- in compute it's preferally to use api.depends('somefield'), but the code bellow may be work for you, use a loop and don't use readonly
#api.multi
def computer_function(self):
for o in self:
o.computation = 243+234
computation2 = fields.Float(string="Result: ", compute=computer_function)

Odoo 10 : Failed to render mail template

I have created an mail template in Odoo 10, and call it with a cron task. The problem is that it doesn't work and give me error, here is the log : https://pastebin.com/c7zCXbxF. Could you help me?
Here is the mail template:
<odoo>
<data>
<record id="crm_lead_reminder" model="mail.template">
<field name="name">Rappel sur le pipeline</field>
<field name="email_from">admin#example.com</field>
<field name="subject">Rappel ${object.type} ${object.name} </field>
<field name="email_to">${object.user_id.partner_id.email}</field>
<field name="model_id" ref="sale_cron.model_crm_lead"/>
<field name="auto_delete" eval="True"/>
<field name="body_html">
<![CDATA[
<p>
hello world
</p>
]]>
</field>
</record>
</data>
And here is the methode that call it:
class sale_cron(models.Model):
_inherit = 'crm.lead'
def _trigger_action(self, date_action, current_date):
date_action = [int(date) for date in date_action.split('-')]
if date_action[0] == current_date.year:
if date_action[1] == current_date.month:
if date_action[2] == current_date.day or date_action[2] == current_date.day + 1:
return 0
return -1
def _check_crm_lead(self):
current_date = datetime.datetime.now()
for crm_lead_id in self.search([('stage_id', '!=', 4)]):
i = self._trigger_action(crm_lead_id.date_action, current_date)
if i == -1:
return 0
template = self.env.ref('sale_cron.crm_lead_reminder')
template.send_mail(self.user_id.id, force_send=True, raise_exception=True)
return 1
ps: the module name is sale_cron.
Try changing your code like this
template.send_mail(crm_lead_id.id, force_send=True, raise_exception=True)
def _check_crm_lead(self):
current_date = datetime.datetime.now()
for crm_lead_id in self.search([('stage_id', '!=', 4)]):
i = self._trigger_action(crm_lead_id.date_action, current_date)
if i == -1:
return 0
template = self.env.ref('sale_cron.crm_lead_reminder')
template.send_mail(crm_lead_id.id, force_send=True, raise_exception=True)
return 1

How to send email notification when Project Issue was created? Odoo 9

Hi I have created new issue but I cannot get email notification.
How do you do that?
Edited
I've add some code below but I cannot get message_follower_ids from self to send emails.
class project_issue(osv.osv):
_inherit = 'project.issue'
_columns = {}
def create(self, cr, uid, vals, context=None):
res = super(project_issue, self).create(cr, uid, vals, context=context)
return res
Updated
I updated the code to get followers email address and successfully sent the mails but they go to one email.
And Its object.name is partner's name but I want it to be issue name.
def create(self, cr, uid, vals, context=None):
res = super(project_issue, self).create(cr, uid, vals, context=context)
issue = self.pool.get('project.issue').browse(cr, uid, res, context=context)
template = self.pool.get('ir.model.data').get_object(cr, uid, 'customized_project', 'email_template_customer_auto')
for follower in issue.message_partner_ids:
self.pool.get('mail.template').send_mail(cr, uid, template.id, follower.id, force_send=True, raise_exception=True, context=context)
here is a email template
<?xml version="1.0" encoding="utf-8"?>
<openerp>
<data noupdate="1">
<!--Email template-->
<record id="email_template_customer_auto" model="mail.template">
<field name="name">Send email notification for issue creation</field>
<field name="email_from">${object.company_id and object.company_id.email or ''}</field>
<field name="subject">New Issue created ${object.name}</field>
<field name="email_to">${object.email|safe}</field>
<field name="model_id" ref="model_project_issue"/>
<field name="auto_delete" eval="True"/>
<field name="lang">${object.lang}</field>
<field name="body_html"><![CDATA[
"""
Write here a body of email using HTML tag.....
"""
]]>
</field>
</record>
</data>
</openerp>
I found the solution to send email when an issue is created
from openerp.osv import osv, fields
import logging
class project_issue(osv.osv):
_inherit = 'project.issue'
_columns = {}
issue = ''
templtate = ''
def create(self, cr, uid, vals, context=None):
res = super(project_issue, self).create(cr, uid, vals, context=context)
self.issue = self.pool.get('project.issue').browse(cr, uid, res, context=context)
manager = self.issue.project_id.user_id.partner_id.id
assignTo = self.issue.user_id.partner_id.id
post_vars = {
'subject': ("Issue {} has been created".format(self.issue.name)),
'body': ("Issue {} has been created".format(self.issue.name)),
'partner_ids': [(4, manager)],
}
thread_pool = self.pool.get('mail.thread')
thread_pool.message_post(cr, uid, False,
context=context,
**post_vars)
return res

Adding an XML attribute conditionally

I need a way to add an XML attribute 'POSITON' to an XML element 'node' conditionally. Currently I'm doing the condition check first and then creating the node.
if (lvl == 2)
node = <node COLOR={ color } CREATED={ epochTimeMillis } ID={ idGen } POSITION={ position } LINK={ link } MODIFIED={ epochTimeMillis } STYLE="bubble" TEXT={ f.getName() }>
<edge COLOR={ color } STYLE={ style } WIDTH={ width }/>
</node>
else
node = <node COLOR={ color } CREATED={ epochTimeMillis } ID={ idGen } LINK={ link } MODIFIED={ epochTimeMillis } STYLE="bubble" TEXT={ f.getName() }>
<edge COLOR={ color } STYLE={ style } WIDTH={ width }/>
</node>
}
Using "null" is not a good practice, but in this case it would help you:
scala> <root ta={ if (true) "true" else null } fa={ if (false) "false" else null } />
res0: scala.xml.Elem = <root ta="true" ></root>
A slightly cleaner way to do the same thing #senia suggests is:
val posOpt = if (lvl2) Some(myPosition) else None
val xml = <mydata position={posOpt orNull}/>
One way is to create the snippet before:
val pos =
if (lvl == 2) {
"position = ..."
} else {
""
}
and to always insert it in the result.
This could by extended by using an Option with embedded map in combination with string interpolation.
val pos =
if (lvl == 2) {
Some(position)
} else {
None
}
with
pos.map(v => s"position = $v")