ipython parallel and own class - ipython

I'm working on a class that handles numeric operations to perform with a data array.
Unfortunately I do not get to work by applying a function of the class to the created direct view. I get the error:
, copy)
163 assert len(bufs) >= 2, "not enough buffers!"
164 pf = buffer_to_bytes_py2(bufs.pop(0))
--> 165 f = uncan(pickle.loads(pf), g)
166 pinfo = buffer_to_bytes_py2(bufs.pop(0))
167 info = pickle.loads(pinfo)
AttributeError: Can't get attribute 'calcParallel' on <IPython.core.interactiveshell.DummyMod object at 0x00000000047E4C50>
and the class:
import numpy as np
import ipyparallel as parallel
class calcParallel():
def __init__(self):
self.data = np.random.rand(10,23)
def calc(self,variables):
#parallel view
rc = parallel.Client()
dview = rc.direct_view()
dview.block = False
#Serial
self.res_serial = [self.__multiply(var) for var in variables]
#Parallel
imports = [
'import numpy as np'
]
#imports
[dview.execute(imp) for imp in imports]
#shared data
dview['data'] = self.data
#run calculation
self.pr_list = [dview.apply_async(self.__multiply, var) for var in variables]
dview.wait(self.pr_list)
#process results
self.res_parallel = []
for r in self.pr_list:
self.res_parallel.append(r.get())
def __multiply(self, num):
return data*num
t = calcParallel()
t.calc(np.random.rand(3))
Pleace help me with my problem and sorry for the bad english

f = uncan(pickle.loads(pf), g), It seems the calc function is not the right attribute for pickle, consider to define the calc outside the Class calcParallel

Related

Use generator with ruamel.yaml

I would like to have a bunch of generators in my config dict. So I tried this:
#yaml.register_class
class UniformDistribution:
yaml_tag = '!uniform'
#classmethod
def from_yaml(cls, a, node):
for x in node.value:
if x[0].value == 'min':
min_ = float(x[1].value)
if x[0].value == 'max':
max_ = float(x[1].value)
def f():
while True:
yield np.random.uniform(min_, max_)
g = f()
return g
However, the parser never returns because generators are used internally to resolve reference like &A and *A. Therefore, something like returning (g,) is a fairly simple workaround, but I would prefer a solution where I don't need the additional and very confusing index 0 term in next(config['position_generator'][0]).
Any Ideas?
This wrapper adapted from a different question did exactly what I was looking for.
class GeneratorWrapper(Generator):
def __init__(self, function, *args):
self.function = function
self.args = args
def send(self, ignored_arg):
return self.function(*self.args)
def throw(self, typ=None, val=None, tb=None):
raise StopIteration
#yaml.register_class
class UniformDistribution:
yaml_tag = '!uniform'
#classmethod
def from_yaml(cls, constructor, node):
for x in node.value:
value = float(x[1].value)
if x[0].value == 'min':
min_ = value
if x[0].value == 'max':
max_ = value
return GeneratorWrapper(np.random.uniform, min_, max_)

How to test function depended on another one in python

I need to test this type of code bellow:
list = [1,2,3,4]
def getData(list):
return list[0] + list[1]
def processData():
data = getData(list)
multiply = data*data
return multiply
def test_functions():
assert getData([0,1]) == 1
assert processData() == 1
How to tell the test I need data = getData([0,1]), so basically replace data with my test values.

On the performance of copying case classes

I have two case classes: addSmall and addBig.
addSmall contains only one field.
addBig contains several fields.
case class AddSmall(set: Set[Int] = Set.empty[Int]) {
def add(e: Int) = copy(set + e)
}
case class AddBig(set: Set[Int] = Set.empty[Int]) extends Foo {
def add(e: Int) = copy(set + e)
}
trait Foo {
val a = "a"; val b = "b"; val c = "c"; val d = "d"; val e = "e"
val f = "f"; val g = "g"; val h = "h"; val i = "i"; val j = "j"
val k = "k"; val l = "l"; val m = "m"; val n = "n"; val o = "o"
val p = "p"; val q = "q"; val r = "r"; val s = "s"; val t = "t"
}
A quick benchmark using JMH shows that copying addBig objects is way more exprensive even if i change only one field..
import java.util.concurrent.TimeUnit
import org.openjdk.jmh.annotations._
#State(Scope.Benchmark)
class AddState {
var elem: Int = _
var addSmall: AddSmall = _
var addBig: AddBig = _
#Setup(Level.Trial)
def setup(): Unit = {
addSmall = AddSmall()
addBig = AddBig()
elem = 1
}
}
#OutputTimeUnit(TimeUnit.MILLISECONDS)
#BenchmarkMode(Array(Mode.Throughput))
class SetBenchmark {
#Benchmark
def addSmall(state: AddState): AddSmall = {
state.addSmall.add(state.elem)
}
#Benchmark
def addBig(state: AddState): AddBig = {
state.addBig.add(state.elem)
}
}
And the results show that copying addBig is more than 10 times slower than copying addSmall!
> jmh:run -i 5 -wi 5 -f1 -t1
[info] Benchmark Mode Cnt Score Error Units
[info] LocalBenchmarks.Set.SetBenchmark.addBig thrpt 5 10732.569 ± 349.577 ops/ms
[info] LocalBenchmarks.Set.SetBenchmark.addSmall thrpt 5 126711.722 ± 10538.611 ops/ms
How come copying the object is much slower for addBig?
As far as i understand structural sharing, since all fields are immutable copying the object should be very efficient as it only needs to store the changes ("delta") which in this case is only the set s, and should thus give the same performance as addSmall.
EDIT: The same performance issue arises when the state is part of the case class.
case class AddBig(set: Set[Int] = Set.empty[Int], a: String = "a", b: String = "b", ...) {
def add(e: Int) = copy(set + e)
}
I guess, that this is because AddBig class extends Foo trait, which has all this String fields - a to t. It seems like, in result object they will be declared as regular fields, not the static fields if compare to Java, hence allocating memory for the object, might be the root cause of slower copy performance.
UPDATE:
In order to verify this theory you can try to use JOL (Java Object Layout) tool - openjdk.java.net/projects/code-tools/jol
Here is the simple code example:
import org.openjdk.jol.info.{ClassLayout, GraphLayout}
println(ClassLayout.parseClass(classOf[AddSmall]).toPrintable())
println(ClassLayout.parseClass(classOf[AddBig]).toPrintable())
println(GraphLayout.parseInstance(AddSmall()).toPrintable)
println(GraphLayout.parseInstance(AddBig()).toPrintable)
Which in my case produced next output (short version for answer readability):
xample.AddSmall object internals:
OFFSET SIZE TYPE DESCRIPTION VALUE
0 12 (object header) N/A
12 4 scala.collection.immutable.Set AddSmall.set N/A
Instance size: 16 bytes
Space losses: 0 bytes internal + 0 bytes external = 0 bytes total
example.AddBig object internals:
OFFSET SIZE TYPE DESCRIPTION VALUE
0 12 (object header) N/A
12 4 scala.collection.immutable.Set AddBig.set N/A
16 4 java.lang.String AddBig.a N/A
20 4 java.lang.String AddBig.b N/A
24 4 java.lang.String AddBig.c N/A
Instance size: 96 bytes
Space losses: 0 bytes internal + 0 bytes external = 0 bytes total
example.AddSmall#ea1a8d5d object externals:
ADDRESS SIZE TYPE PATH VALUE
770940b28 16 example.AddSmall (object)
770940b38 470456 (something else) (somewhere else) (something else)
7709b38f0 16 scala.collection.immutable.Set$EmptySet$ .set (object)
example.AddBig#480bdb19d object externals:
ADDRESS SIZE TYPE PATH VALUE
770143658 24 java.lang.String .h (object)
770143670 24 [C .h.value [h]
770143688 15536 (something else) (somewhere else) (something else)
770147338 24 java.lang.String .m (object)
770147350 24 [C .m.value [m]
770147368 1104264 (something else) (somewhere else) (something else)
770254cf0 24 java.lang.String .r (object)
770254d08 24 [C .r.value [r]
770254d20 7140768 (something else) (somewhere else) (something else)
7709242c0 24 java.lang.String .a (object)
So as you can see fields from parent trait become class fields as well, so will be copied along with the object.
Hope this helps!
Have you checked this question?
scala case class copy implementation
You can check compiler generated things to elaborate this. There's a probability that these vals became regular fields of case class and being copied each time class copied.
Your Foo trait adds 20 members to every subclass even though they are constants. This is going to use more memory and make copying the class slower.
Consider
1) Making them def rather than val so they are no longer data members
OR
2) Moving them into the companion class for the trait and accessing as Foo.a etc.

Macro expansion contains free variable

My code compiles with the following error: Macro expansion contains free term variable Hello ...
I have reduced it to minimal example:
class Hello(val hi: String) {
val xx = reify(hi)
var yy = q""
}
def setYYImpl(c: Context)(hExpr: c.Expr[Hello]): c.Expr[Hello] = {
import c.universe._
val hello = c.eval(c.Expr[Hello](c.untypecheck(hExpr.tree.duplicate)))
val xxVal = c.internal.createImporter(u).importTree(hello.xx.tree)
c.Expr(q"""{val h = new Hello("HO"); h.yy=$xxVal; h}""") // it should set `h.yy` to Tree:"HO"
}
def setYY(hExpr: Hello): Hello = macro setYYImpl
setYY(new Hello("HI"))
After inspecting similar question: Can this free-term-variable error (produced at macro expansion) be avoided?
I have come to conclusion that the problem is reify(hi) which refers to the compile time value Hello.hi.
Is it possible to work around this problem? reify(hi) returns Expr Hello.hi, can I somehow remove the Hello. prefix?
Try to replace
val xx = reify(hi)
with
val xx = Literal(Constant(hi))
i.e. build the tree manually (and
.importTree(hello.xx.tree)
with
.importTree(hello.xx)).
(If it's Literal(Constant... only in your example and more complex tree in actual use case, anyway try to build it manually rather than use reify.)
Then you'll have different error
Error: type mismatch;
found : String("HI")
required: reflect.runtime.universe.Tree
setYY(new Hello("HI"))
because your macro returns
Expr[Hello]({
val h = new Hello("HO");
h.yy = "HI"; // h.yy is q"" i.e. Tree, "HI" is String
h
})

process function of DoFn not executing

I am trying to write a beam transform such as
util.py
class GroupIntoBatches(PTransform):
def __init__(self, batch_size):
self.batch_size = batch_size
#staticmethod
def of_size(batch_size):
return GroupIntoBatches(batch_size)
def expand(self, pcoll):
input_coder = coders.registry.get_coder(pcoll)
if not input_coder.is_kv_coder():
raise ValueError(
'coder specified in the input PCollection is not a KvCoder')
key_coder = input_coder.key_coder()
value_coder = input_coder.value_coder()
return pcoll | ParDo(_GroupIntoBatchesDoFn(self.batch_size, key_coder, value_coder))
class _GroupIntoBatchesDoFn(DoFn):
def __init__(self, batch_size, input_key_coder, input_value_coder):
self.batch_size = batch_size
self.batch_spec = BagStateSpec("GroupIntoBatches", input_value_coder)
def process(self, element):
raise Exception("Not getting to this point") # This is not working
print element
Trying to execute this transform via test case
util_test.py
class GroupIntoBatchesTest(unittest.TestCase):
NUM_ELEMENTS = 10
BATCH_SIZE = 5
#staticmethod
def _create_test_data():
scientists = [
"Einstein",
"Darwin",
"Copernicus",
"Pasteur",
"Curie",
"Faraday",
"Newton",
"Bohr",
"Galilei",
"Maxwell"
]
data = []
for i in range(GroupIntoBatchesTest.NUM_ELEMENTS):
index = i % len(scientists)
data.append(("key", scientists[index]))
return data
def test_in_global_window(self):
pipeline = TestPipeline()
collection = pipeline | beam.Create(GroupIntoBatchesTest._create_test_data()) | util.GroupIntoBatches.of_size(GroupIntoBatchesTest.BATCH_SIZE)
My Question is what is the reason the process function is not getting called on my _GroupIntoBatchesDoFn
I get this result on running my test case
test_in_global_window
(apache_beam.transforms.util_test.GroupIntoBatchesTest) ... ok
Your test is constructing the pipeline, but not actually executing it. You need to either write
pipeline = TestPipeline()
collection = pipeline | ...
pipeline.run()
or, alternatively
with TestPipeline() as pipeline:
collection = pipeline | ...
# run is implicitly called on exit of the with block
(You may also be interested in the BatchElements transform.)