Subtracting using minus function in alloy - subtraction

I have created a vending machine it works fine. I want to subtract 1 from the quantity of items once the transaction is completed. I have provided comments in my code for understanding. Ignore some of the comments in pred chocolate. Somehow I am trying to subtract but it just wont. I don't know what seems to be the problem. Any help is appreciated .
sig button {
qty:Int} // buttons on vending machine for selecting chocolates
//sig coin{}
sig choco{
value:Int, // Each chocolate has some cost as an attribute aka value.
choice :one button // Selecting option
}
fact {
// all c:choco | all c1:choco -c | c1.choice != c.choice
}
sig machine{
cust : one customer, // Customer entity
a,b,c,d,nullb ,ip: one button, // buttons on vending machine ,ip is the input selected by user
//oners,twors,fivers ,tenrs,null1: set coin,
ipp,opc2 : one Coin, // ipp = input rs , opc = balance rs
customeripb: cust one -> one ip, // customer presses a button
customeripc: cust one -> one ipp, // customer enters coins
customeropc: opc2 one -> one cust, //customer receives balance of coins
op: one choco , // output of chocolate from machine
customerop: op one -> one cust, // customer receives a chocolate
cadbury, kitkat, eclairs , gum,null: lone choco // types of chocolate
}
{
//#(fivers+tenrs+null+twors+oners) = 5
#(a+b+c+d) = 4 // no of buttons of a b c and d are 4 on machine
# (cadbury+kitkat+ eclairs +gum) =4 // no of options to choose = 4
cadbury=choice.a // cadbury corresponds to button a
cadbury.value= 10 // cadbury costs 10rs
kitkat=choice.b // kitkat corresponds to button b
kitkat.value=5 // kitkat costs 5rs
null.value=0 // null costs 0 rs
null=choice.nullb
// as such null doesnt exist it is just to specify no i/p no o/p and nulb is an imaginary button
eclairs=choice.c // eclairs corresponds to button c
eclairs.value=1 // eclairs costs 1 rs
gum=choice.d // gum corresponds to button d
gum.value=2 // gum costs 1 rs
a.qty>=10 and a.qty<=40
b.qty>=11 and b.qty<=40
c.qty>=12 and c.qty<=40
d.qty>=13 and d.qty<=40
nullb.qty=0
//ip=nullb //input button selection is never nullb(which is imaginary button)
ipp.value!=0 // input of coins is never = 0rs
/* all m:machine|all o:opc2
|all opp: op| all i:ip|all ii:ipp| all c:m.cust
|c -> i in m.customeripb and c->ii in m.customeripc and o->c in m.customerop and opp->c in m.customerop
*/
//button=!=none
}
sig customer //user of machine
{
}
abstract sig Coin { //each coin has a valueof rs
value: Int
}
sig Nullrs extends Coin {} { value = 0 } // void rs
sig Oners extends Coin {} { value = 1 } // one rs
sig Twors extends Coin {} { value = 2 } // twors
sig Fivers extends Coin {}{ value = 5 } // five rs
sig Tenrs extends Coin {} { value = 10 } // ten rs
sig Threers extends Coin {} { value = 3 } // this is only used in o/p to specify 3rs will come out
sig Fourrs extends Coin {} { value = 4 }// this is only used in o/p to specify 4rs will come out
sig Sixrs extends Coin {} { value = 6 }// this is only used in o/p to specify 6rs will come out
sig Sevenrs extends Coin {}{ value = 7 }// this is only used in o/p to specify 7rs will come out
sig Eightrs extends Coin {} { value = 8 } // this is only used in o/p to specify 8rs will come out
sig Niners extends Coin {} { value = 9} //// this is only used in o/p to specify 9rs will come out
pred show{} // show
pred chocolate [before,after:machine ] // machine has two states one before o/p and one after
{
before.cadbury=after.cadbury
before.kitkat=after.kitkat
before.eclairs=after.eclairs
before.gum=after.gum
//all chocolates will not change and are fixed
before.ipp.value=after.ipp.value
// input value of rs remains same i.e i/p is inside machine once inputed so it cant change
before.opc2.value=0 // before state o/p value of balance coins =0
before.op=before.null // beforestate o/p = no chocolate
before.ip!=before.nullb // input button pressed never equals nullb
after.ip!=after.nullb // input button pressed never equals nullb
//before.ip=after.ip // input button pressed remains same
after.op=after.kitkat or after.op=after.eclairs
before.null=after.null // imaginary null chocolate remains in same state
before.opc2!=none and after.opc2 !=none
// balance of coins is never empty in case of empty I have defined nullrs
(after.op.value=before.ipp.value=>after.opc2.value=0)
//
(after.op=after.null=>after.opc2.value=before.ipp.value)
(before.ipp.value > after.op.value=>after.opc2.value=before.ipp.value-after.op.value)
//(before.ipp.value=after.op.value=>after.opc2.value=0)
//opc2.value!=ipp.value
before.ip=before.a or before.ip=before.b or before.ip=before.c or before.ip=before.d
(after.op=after.cadbury ) => ( ( after.ip=after.a and after.a.qty=minus[before.a.qty,1])) else
(after.op=after.kitkat ) => ( (after.ip=after.b and after.b.qty=minus[before.b.qty, 1])) else
(after.op=after.eclairs ) =>( (after.ip=after.c and after.c.qty=minus[before.c.qty,1])) else
(after.op=after.gum ) =>((after.ip=after.d and after.d.qty=minus[before.d.qty,1])) else
(after.ip=before.ip and after.ip.qty=minus[before.ip.qty,0] )
after.op!=before.null => after.op.choice=before.ip
(after.op=before.gum=>before.ipp.value>=Twors.value)
after.op=before.cadbury=>before.ipp.value>=Tenrs.value
after.op=before.eclairs=>before.ipp.value>=Oners.value
after.op=before.kitkat=>before.ipp.value>=Fivers.value
(before.ipp=Oners or before.ipp=Twors or before.ipp=Fivers or before.ipp=Tenrs or before.ipp=Nullrs) and
before.ipp!=Threers and before.ipp!=Fourrs and before.ipp !=Sixrs and before.ipp!=Sevenrs and before.ipp!=Eightrs and before.ipp!=Niners
(before.ip=before.b and before.ipp.value < 5) => (after.op!=before.kitkat or after.op!=before.eclairs or after.op!=before.cadbury or after.op!=before.gum)and after.op=before.null
(before.ip=before.d and before.ipp.value < 2) => (after.op!=before.kitkat or after.op!=before.eclairs or after.op!=before.cadbury or after.op!=before.gum)and after.op=before.null
(before.ip=before.a and before.ipp.value < 10 )=> (after.op!=before.kitkat or after.op!=before.eclairs or after.op!=before.cadbury or after.op!=before.gum) and after.op=before.null
(before.ip=before.c and before.ipp.value >= 1) => (after.op!=before.kitkat or after.op!=before.null or after.op!=before.cadbury or after.op!=before.gum) and after.op=before.eclairs
(before.ip=before.c and before.ipp.value = 0) => (after.op!=before.kitkat or after.op!=before.null or after.op!=before.cadbury or after.op!=before.gum) and after.op=before.null
(before.ip=before.a and before.ipp.value =10) => (after.op!=before.kitkat or after.op!=before.null or after.op!=before.eclairs or after.op!=before.gum) and after.op= before.cadbury
(before.ip=before.d and before.ipp.value >= 2) => (after.op!=before.kitkat or after.op!=before.null or after.op!=before.cadbury or after.op!=before.eclairs) and after.op=before.gum
(before.ip=before.b and before.ipp.value >= 5) => (after.op!=before.eclairs or after.op!=before.null or after.op!=before.cadbury or after.op!=before.gum) and after.op=before.kitkat
}
run chocolate for exactly 2 machine, 8 button, 5 choco,9 Int,5 Coin,1 customer

In general, it would make sense to be more specific than "it doesn't work".
I'm assuming that what you mean by "it doesn't work" is that in the after machine you expect the quantity of the selected chocolate to be decreased by 1, but instead, it stays the same. The reason for that is your (if-then-else) or (if-then-else) or ... expression, which is logically flawed. What you probably wanted to express is to enforce at least one then branch (because you know that exactly one if condition will be satisfied), but that is not necessary to satisfy this whole disjunction.
More concretely, in
((after.op=after.cadbury)
=> (... and after.a.qty=minus[before.a.qty,1] and ...)
else (... and after.a.qty=before.a.qty and ...)
)
or
((after.op=after.kitkat)
=> (... and after.b.qty=minus[before.b.qty,1] and ...)
else (... and after.b.qty=before.b.qty and ...)
)
even if after.op is equal to after.cadbury, that does not enforce the then branch of that clause to be true, because to satisfy this whole expression, it is enough to satisfy the else branch of the next clause, which says that all quantities should stay the same.
What you want is some soft of if-then-elsif-...-else construct, e.g.,
(after.op = after.cadbury) => {
...
} else (after.op = after.kitkat) => {
...
} else {
...
}
If you do that, your machine will still not work, that is, your model will become unsatisfiable: your constraints enforce that both after and before machines share the same buttons1 and quantities are associated with buttons (the qty field is in the button sig), which means that quantities must be the same in both after and before machines. I don't really see any good reason to put qty in sig button.
[1]: by having before.cadbury=after.cadbury and ... in your chocolate predicate, and cadbury=choice.a and ... in your appended facts for sig machine

Related

When to use keyword return in Scala

I learned early on that there is no reason to use the return keyword in Scala (as far as I'm aware). That being said I found an example where simply changing adding the return keyword made my function work, where it previously didn't.
The code in question comes from my solution to the Advent of Code day 7 challenge.
def containsShinyGoldBag(bagContents: Map[String, List[String]], currentBag: String): Boolean = {
val contents = bagContents(currentBag)
if (bagContents(currentBag).contains("shiny gold") ) {
// Base Case: Bag Found in list of bags
true
} else if (contents == List.empty){
// Base Case: Dead End
false
} else {
// Continue searching down list
// Ideal solution ( gives same result as the working solution without return keyword )
// for (b <- contents) containsShinyGoldBag(bagContents, b)
// Working solution
for (b <- contents) {
if (containsShinyGoldBag(bagContents, b)) {
println(s"Found one! $b inside a $currentBag")
return true // <--- culprit
}
else false
}
false
}
}
// In the main function
var count = 0
for (bag <- bagContents.keys) {
if (containsShinyGoldBag(bagContents, bag)) {
count = count + 1
}
}
println(s"There are $count way to bring a shiny gold bag!")
When I run the code without return I end up with count = 7, which is the number of bags directly containing a shiny gold bag, rather than the correct number which counts bags that contain a shiny gold bag somewhere inside of one of their other bags down the line.
A function returns the value of the last expression it evaluates; in your case that will be one of:
true after if (bagContents(currentBag).contains("shiny gold") );
false after else if (contents == List.empty);
the last false.
true is not in such a position, so you need return to, well, make the function return it. Otherwise it's evaluated and ignored because you don't do anything with it. So is else false in the same for, actually, it can be removed without changing the meaning.
The alternative to avoid return here is
contents.exists(b => containsShinyGoldBag(bagContents, b))

Can anyone explain interesting spin-lock behavior?

Given the following code
case class Score(value: BigInt, random: Long = randomLong) extends Comparable[Score] {
override def compareTo(that: Score): Int = {
if (this.value < that.value) -1
else if (this.value > that.value) 1
else if (this.random < that.random) -1
else if (this.random > that.random) 1
else 0
}
override def equals(obj: _root_.scala.Any): Boolean = {
val that = obj.asInstanceOf[Score]
this.value == that.value && this.random == that.random
}
}
#tailrec
private def update(mode: UpdateMode, member: String, newScore: Score, spinCount: Int, spinStart: Long): Unit = {
// Caution: there is some subtle logic below, so don't modify it unless you grok it
try {
Metrics.checkSpinCount(member, spinCount)
} catch {
case cause: ConcurrentModificationException =>
throw new ConcurrentModificationException(Leaderboard.maximumSpinCountExceeded.format("update", member), cause)
}
// Set the spin-lock
put(member, None) match {
case None =>
// BEGIN CRITICAL SECTION
// Member's first time on the board
if (scoreToMember.put(newScore, member) != null) {
val message = s"$member: added new member in memberToScore, but found old member in scoreToMember"
logger.error(message)
throw new ConcurrentModificationException(message)
}
memberToScore.put(member, Some(newScore)) // remove the spin-lock
// END CRITICAL SECTION
case Some(option) => option match {
case None => // Update in progress, so spin until complete
//logger.debug(s"update: $member locked, spinCount = $spinCount")
for (i <- -1 to spinCount * 2) {Thread.`yield`()} // dampen contention
update(mode, member, newScore, spinCount + 1, spinStart)
case Some(oldScore) =>
// BEGIN CRITICAL SECTION
// Member already on the leaderboard
if (scoreToMember.remove(oldScore) == null) {
val message = s"$member: oldScore not found in scoreToMember, concurrency defect"
logger.error(message)
throw new ConcurrentModificationException(message)
} else {
val score =
mode match {
case Replace =>
//logger.debug(s"$member: newScore = $newScore")
newScore
case Increment =>
//logger.debug(s"$member: newScore = $newScore, oldScore = $oldScore")
Score(newScore.value + oldScore.value)
}
//logger.debug(s"$member: updated score = $score")
scoreToMember.put(score, member)
memberToScore.put(member, Some(score)) // remove the spin-lock
//logger.debug(s"update: $member unlocked")
}
// END CRITICAL SECTION
// Do this outside the critical section to reduce time under lock
if (spinCount > 0) Metrics.checkSpinTime(System.nanoTime() - spinStart)
}
}
}
There are two important data structures: memberToScore and scoreToMember. I have experimented using both TrieMap[String,Option[Score]] and ConcurrentHashMap[String,Option[Score]] for memberToScore and both have the same behavior.
So far my testing indicates the code is correct and thread safe, but the mystery is the performance of the spin-lock. On a system with 12 hardware threads, and 1000 iterations on 12 Futures: hitting the same member all the time results in spin cycles of 50 or more, but hitting a random distribution of members can result in spin cycles of 100 or more. The behavior gets worse if I don't dampen the spin without iterating over yield() calls.
So, this seems counter intuitive, I was expecting the random distribution of keys to result in less spin than the same key, but testing proves otherwise.
Can anyone offer some insight into this counter-intuitive behavior?
Granted there may be better solutions to my design, and I am open to them, but for now I cannot seem to find a satisfactory explanation for what my tests are showing, and my curiosity leaves me hungry.
As an aside, while the single member test has a lower ceiling for the spin count, the random member test has a lower ceiling for time spinning, which is what I would expect. I just cannot explain why the random member test generally produces a higher ceiling for spin count.

SCALA multiple nested case statement in map RDD operations

I have an RDD with 7 string elements
linesData: org.apache.spark.rdd.RDD[(String, String, String, String, String, String, String)]
I need to figure out for each record the following 2 items: Status and CSrange.
the logic is something like:
case when a(6)=0
if Date(a(5)) > Date(a(4)) then
if Date(a(5)) - Date(a(4)) > 60 days then
staus = '60+'
else
status = 'Curr'
endif
else
status = 'Curr'
end
when (a(6) >=1 and a(6) <=3 ) then
Status = 'In-Forcl'
when (a(6) >=4 and a(6) <=8)
Status = 'Forclosed'
else
Status = 'Unknown'
end case
case when (a(1) <640 and a(1) >0 ) then CSrange = '<640'
when (a(1) <650 and a(1)> 579 then CSrange = '640-649'
when (a(1) <660 and a(1)> 619 then CSrange = '650-659'
when (a(1) <680 and a(1)> 639 then CSrange = '640-649'
when (a(1) >789 then CSrange = '680+'
else
CSRange ='Unknown'
end case
At this point, I would like to write the data out to disk, with probably 9 elements insteatd of 7. (later I will need to do calculate the rate of each of the statuses above by various elements).
My first problems are:
1. how to do date arithmetics? as I need to stay at the RDD level (no data frames).
2. I don't know how to do the CASE statements in SCALA.
SAMPLE DATA:
(2017_7_0555,794,Scott,CORNERSTONE,8/1/2017,8/1/2017,0)
(2017_7_0557,682,Hennepin,LAKE AREA MT,9/1/2017,8/1/2017,0)
(2017_7_0565,754,Ramsey,GUARANTEED R,6/1/2017,8/1/2017,0)
(2017_7_0570,645,Hennepin,FAIRWAY INDE,2/1/2015,8/1/2017,5)
(2017_7_0574,732,Wright,GUARANTEED R,7/1/2017,8/1/2017,0)
(2017_7_0575,789,Hennepin,GUARANTEED R,8/1/2017,8/1/2017,0)
(2017_7_0577,662,Hennepin,MIDCOUNTRY,8/1/2017,8/1/2017,0)
(2017_7_4550,642,Mower,WELLS FARGO,5/1/2017,8/1/2017,0)
(2017_7_4574,689,Hennepin,RIVER CITY,8/1/2017,8/1/2017,0)
(2017_7_4584,662,Hennepin,WELLS FARGO,8/1/2017,8/1/2017,0)
(2017_7_4600,719,Ramsey,PHH HOME LOA,5/1/2017,8/1/2017,0)
I would create a case class of seven fields and map RDD[(,...)] to RDD[MyClass] and cast to appropriate types. I recommend you JodaTime date library for dates. This will make your code more descriptive.
Then in a map extract status and range into two functions:
myRDD.map(myInstance => (getStatus(myInstance), getRange(myInstance)))
def getStatus(myInstance: MyClass) : String = {
val (_,_,_,_,date4,date5,field6,_) = MyClass.unapply(myInstance).get
field6 match {
case 0 => {
if(date5.isAfter(date4) {
if(date4.plusDays(60).isAfter(date5)){
"60+"
} else {
"Curr"
}
}
}
case x if (x >= 1 && x <= 3) => "Forclosed"
....
}
}
Note:
I haven't tested the code.
Rename variables from the example.
I show you an example about how to manage JodaTime dates, and scala pattern matching. You have to finish the function and define the other one.

Confusion over behavior of Publish().Refcount()

I've got a simple program here that displays the number of letters in various words. It works as expected.
static void Main(string[] args) {
var word = new Subject<string>();
var wordPub = word.Publish().RefCount();
var length = word.Select(i => i.Length);
var report =
wordPub
.GroupJoin(length,
s => wordPub,
s => Observable.Empty<int>(),
(w, a) => new { Word = w, Lengths = a })
.SelectMany(i => i.Lengths.Select(j => new { Word = i.Word, Length = j }));
report.Subscribe(i => Console.WriteLine($"{i.Word} {i.Length}"));
word.OnNext("Apple");
word.OnNext("Banana");
word.OnNext("Cat");
word.OnNext("Donkey");
word.OnNext("Elephant");
word.OnNext("Zebra");
Console.ReadLine();
}
And the output is:
Apple 5
Banana 6
Cat 3
Donkey 6
Elephant 8
Zebra 5
I used the Publish().RefCount() because "wordpub" is included in "report" twice. Without it, when a word is emitted first one part of the report would get notified by a callback, and then the other part of report would be notified, double the notifications. That is kindof what happens; the output ends up having 11 items rather than 6. At least that is what I think is going on. I think of using Publish().RefCount() in this situation as simultaneously updating both parts of the report.
However if I change the length function to ALSO use the published source like this:
var length = wordPub.Select(i => i.Length);
Then the output is this:
Apple 5
Apple 6
Banana 6
Cat 3
Banana 3
Cat 6
Donkey 6
Elephant 8
Donkey 8
Elephant 5
Zebra 5
Why can't the length function also use the same published source?
This was a great challenge to solve!
So subtle the conditions that this happens.
Apologies in advance for the long explanation, but bear with me!
TL;DR
Subscriptions to the published source are processed in order, but before any other subscription directly to the unpublished source. i.e. you can jump the queue!
With GroupJoin subscription order is important to determine when windows open and close.
My first concern would be that you are publish refcounting a subject.
This should be a no-op.
Subject<T> has no subscription cost.
So when you remove the Publish().RefCount() :
var word = new Subject<string>();
var wordPub = word;//.Publish().RefCount();
var length = word.Select(i => i.Length);
then you get the same issue.
So then I look to the GroupJoin (because my intuition suggests that Publish().Refcount() is a red herring).
For me, eyeballing this alone was too hard to rationalise, so I lean on a simple debugging too I have used dozens of times of the years - a Trace or Log extension method.
public interface ILogger
{
void Log(string input);
}
public class DumpLogger : ILogger
{
public void Log(string input)
{
//LinqPad `Dump()` extension method.
// Could use Console.Write instead.
input.Dump();
}
}
public static class ObservableLoggingExtensions
{
private static int _index = 0;
public static IObservable<T> Log<T>(this IObservable<T> source, ILogger logger, string name)
{
return Observable.Create<T>(o =>
{
var index = Interlocked.Increment(ref _index);
var label = $"{index:0000}{name}";
logger.Log($"{label}.Subscribe()");
var disposed = Disposable.Create(() => logger.Log($"{label}.Dispose()"));
var subscription = source
.Do(
x => logger.Log($"{label}.OnNext({x.ToString()})"),
ex => logger.Log($"{label}.OnError({ex})"),
() => logger.Log($"{label}.OnCompleted()")
)
.Subscribe(o);
return new CompositeDisposable(subscription, disposed);
});
}
}
When I add the logging to your provided code it looks like this:
var logger = new DumpLogger();
var word = new Subject<string>();
var wordPub = word.Publish().RefCount();
var length = word.Select(i => i.Length);
var report =
wordPub.Log(logger, "lhs")
.GroupJoin(word.Select(i => i.Length).Log(logger, "rhs"),
s => wordPub.Log(logger, "lhsDuration"),
s => Observable.Empty<int>().Log(logger, "rhsDuration"),
(w, a) => new { Word = w, Lengths = a })
.SelectMany(i => i.Lengths.Select(j => new { Word = i.Word, Length = j }));
report.Subscribe(i => ($"{i.Word} {i.Length}").Dump("OnNext"));
word.OnNext("Apple");
word.OnNext("Banana");
word.OnNext("Cat");
word.OnNext("Donkey");
word.OnNext("Elephant");
word.OnNext("Zebra");
This will then output in my log something like the following
Log with Publish().RefCount() used
0001lhs.Subscribe()
0002rhs.Subscribe()
0001lhs.OnNext(Apple)
0003lhsDuration.Subscribe()
0002rhs.OnNext(5)
0004rhsDuration.Subscribe()
0004rhsDuration.OnCompleted()
0004rhsDuration.Dispose()
OnNext
Apple 5
0001lhs.OnNext(Banana)
0005lhsDuration.Subscribe()
0003lhsDuration.OnNext(Banana)
0003lhsDuration.Dispose()
0002rhs.OnNext(6)
0006rhsDuration.Subscribe()
0006rhsDuration.OnCompleted()
0006rhsDuration.Dispose()
OnNext
Banana 6
...
However when I remove the usage Publish().RefCount() the new log output is as follows:
Log without only Subject
0001lhs.Subscribe()
0002rhs.Subscribe()
0001lhs.OnNext(Apple)
0003lhsDuration.Subscribe()
0002rhs.OnNext(5)
0004rhsDuration.Subscribe()
0004rhsDuration.OnCompleted()
0004rhsDuration.Dispose()
OnNext
Apple 5
0001lhs.OnNext(Banana)
0005lhsDuration.Subscribe()
0002rhs.OnNext(6)
0006rhsDuration.Subscribe()
0006rhsDuration.OnCompleted()
0006rhsDuration.Dispose()
OnNext
Apple 6
OnNext
Banana 6
0003lhsDuration.OnNext(Banana)
0003lhsDuration.Dispose()
...
This gives us some insight, however when the issue really becomes clear is when we start annotating our logs with a logical list of subscriptions.
In the original (working) code with the RefCount our annotations might look like this
//word.Subsribers.Add(wordPub)
0001lhs.Subscribe() //wordPub.Subsribers.Add(0001lhs)
0002rhs.Subscribe() //word.Subsribers.Add(0002rhs)
0001lhs.OnNext(Apple)
0003lhsDuration.Subscribe() //wordPub.Subsribers.Add(0003lhsDuration)
0002rhs.OnNext(5)
0004rhsDuration.Subscribe()
0004rhsDuration.OnCompleted()
0004rhsDuration.Dispose()
OnNext
Apple 5
0001lhs.OnNext(Banana)
0005lhsDuration.Subscribe() //wordPub.Subsribers.Add(0005lhsDuration)
0003lhsDuration.OnNext(Banana)
0003lhsDuration.Dispose() //wordPub.Subsribers.Remove(0003lhsDuration)
0002rhs.OnNext(6)
0006rhsDuration.Subscribe()
0006rhsDuration.OnCompleted()
0006rhsDuration.Dispose()
OnNext
Banana 6
So in this example, when word.OnNext("Banana"); is executed the chain of observers is linked in this order
wordPub
0002rhs
However, wordPub has child subscriptions!
So the real subscription list looks like
wordPub
0001lhs
0003lhsDuration
0005lhsDuration
0002rhs
If we annotate the Subject only log we see where the subtlety lies
0001lhs.Subscribe() //word.Subsribers.Add(0001lhs)
0002rhs.Subscribe() //word.Subsribers.Add(0002rhs)
0001lhs.OnNext(Apple)
0003lhsDuration.Subscribe() //word.Subsribers.Add(0003lhsDuration)
0002rhs.OnNext(5)
0004rhsDuration.Subscribe()
0004rhsDuration.OnCompleted()
0004rhsDuration.Dispose()
OnNext
Apple 5
0001lhs.OnNext(Banana)
0005lhsDuration.Subscribe() //word.Subsribers.Add(0005lhsDuration)
0002rhs.OnNext(6)
0006rhsDuration.Subscribe()
0006rhsDuration.OnCompleted()
0006rhsDuration.Dispose()
OnNext
Apple 6
OnNext
Banana 6
0003lhsDuration.OnNext(Banana)
0003lhsDuration.Dispose()
So in this example, when word.OnNext("Banana"); is executed the chain of observers is linked in this order
1. 0001lhs
2. 0002rhs
3. 0003lhsDuration
4. 0005lhsDuration
As the 0003lhsDuration subscription is activated after the 0002rhs, it wont see the "Banana" value to terminate the window, until after the rhs has been sent the value, thus yielding it in the still open window.
Whew
As #francezu13k50 points out the obvious and simple solution to your problem is to just use word.Select(x => new { Word = x, Length = x.Length });, but as I think you have given us a simplified version of your real problem (appreciated) I understand why this isn't suitable.
However, as I dont know what your real problem space is I am not sure what to suggest to you to provide a solution, except that you have one with your current code, and now you should know why it works the way it does.
RefCount returns an Observable that stays connected to the source as long as there is at least one subscription to the returned Observable. When the last subscription is disposed, RefCount disposes it's connection to the source, and reconnects when a new subscription is being made. It might be the case with your report query that all subscriptions to the 'wordPub' are disposed before the query is fulfilled.
Instead of the complicated GroupJoin query you could simply do :
var report = word.Select(x => new { Word = x, Length = x.Length });
Edit:
Change your report query to this if you want to use the GroupJoin operator :
var report =
wordPub
.GroupJoin(length,
s => wordPub,
s => Observable.Empty<int>(),
(w, a) => new { Word = w, Lengths = a })
.SelectMany(i => i.Lengths.FirstAsync().Select(j => new { Word = i.Word, Length = j }));
Because GroupJoin seems to be very tricky to work with, here is another approach for correlating the inputs and outputs of functions.
static void Main(string[] args) {
var word = new Subject<string>();
var length = new Subject<int>();
var report =
word
.CombineLatest(length, (w, l) => new { Word = w, Length = l })
.Scan((a, b) => new { Word = b.Word, Length = a.Word == b.Word ? b.Length : -1 })
.Where(i => i.Length != -1);
report.Subscribe(i => Console.WriteLine($"{i.Word} {i.Length}"));
word.OnNext("Apple"); length.OnNext(5);
word.OnNext("Banana");
word.OnNext("Cat"); length.OnNext(3);
word.OnNext("Donkey");
word.OnNext("Elephant"); length.OnNext(8);
word.OnNext("Zebra"); length.OnNext(5);
Console.ReadLine();
}
This approach works if every input has 0 or more outputs subject to the constraints that (1) outputs only arrive in the same order as the inputs AND (2) each output corresponds to its most recent input. This is like a LeftJoin - each item in the first list (word) is paired with items in the right list (length) that subsequently arrive, up until another item in the first list is emitted.
Trying to use regular Join instead of GroupJoin. I thought the problem was that when a new word was created there was a race condition inside Join between creating a new window and ending the current one. So here I tried to elimate that by pairing every word with a null signifying the end of the window. Doesn't work, just like the first version did not. How is it possible that a new window is created for each word without the previous one being closed first? Completely confused.
static void Main(string[] args) {
var lgr = new DelegateLogger(Console.WriteLine);
var word = new Subject<string>();
var wordDelimited =
word
.Select(i => Observable.Return<string>(null).StartWith(i))
.SelectMany(i => i);
var wordStart = wordDelimited.Where(i => i != null);
var wordEnd = wordDelimited.Where(i => i == null);
var report = Observable
.Join(
wordStart.Log(lgr, "word"), // starts window
wordStart.Select(i => i.Length),
s => wordEnd.Log(lgr, "expireWord"), // ends current window
s => Observable.Empty<int>(),
(l, r) => new { Word = l, Length = r });
report.Subscribe(i => Console.WriteLine($"{i.Word} {i.Length}"));
word.OnNext("Apple");
word.OnNext("Banana");
word.OnNext("Cat");
word.OnNext("Zebra");
word.OnNext("Elephant");
word.OnNext("Bear");
Console.ReadLine();
}

How to only emit consistent calculations?

I'm using reactive programming to do a bunch of calculations. Here is a simple example that tracks two numbers and their sum:
static void Main(string[] args) {
BehaviorSubject<int> x = new BehaviorSubject<int>(1);
BehaviorSubject<int> y = new BehaviorSubject<int>(2);
var sum = Observable.CombineLatest(x, y, (num1, num2) => num1 + num2);
Observable
.CombineLatest(x, y, sum, (xx, yy, sumsum) => new { X = xx, Y = yy, Sum = sumsum })
.Subscribe(i => Console.WriteLine($"X:{i.X} Y:{i.Y} Sum:{i.Sum}"));
x.OnNext(3);
Console.ReadLine();
}
This generates the following output:
X:1 Y:2 Sum:3
X:3 Y:2 Sum:3
X:3 Y:2 Sum:5
Notice how second output result is "incorrect" because it is showing that 3+2=3. I understand why this is happening (x is updated before the sum is updated) but I want my output calculations to be atomic/consistent - no value should be emitted until all dependent calculations are complete. My first approach was this...
Observable.When(sum.And(Observable.CombineLatest(x, y)).Then((s, xy) => new { Sum = s, X = xy[0], Y = xy[1] } ));
This seems to work for my simple example. But my actual code has LOTS of calculated values and I couldn't figure out how to scale it. For example, if there was a sum and squaredSum, I don't know how to wait for each of these to emit something before taking action.
One method that should work (in-theory) is to timestamp all the values I care about, as shown below.
Observable
.CombineLatest(x.Timestamp(), y.Timestamp(), sum.Timestamp(), (xx, yy, sumsum) => new { X = xx, Y = yy, Sum = sumsum })
.Where(i=>i.Sum.Timestamp>i.X.Timestamp && i.Sum.Timestamp>i.Y.Timestamp)
// do the calculation and subscribe
This method could work for very complicated models. All I have to do is ensure that no calculated value is emitted that is older than any core data value. I find this to be a bit of a kludge. It didn't actually work in my console app. When I replaced Timestamp with a custom extension that assigned a sequential int64 it did work.
What is a simple, clean way to handle this kind of thing in general?
=======
I'm making some progress here. This waits for a sum and sumSquared to emit a value before grabbing the data values that triggered the calculation.
var all = Observable.When(sum.And(sumSquared).And(Observable.CombineLatest(x, y)).Then((s, q, data)
=> new { Sum = s, SumSquared = q, X = data[0], Y = data[1] }));
This should do what you want:
Observable.CombineLatest(x, y, sum)
.DistinctUntilChanged(list => list[2])
.Subscribe(list => Console.WriteLine("{0}+{1}={2}", list[0], list[1], list[2]));
It waits until the sum has been updated, which means that all its sources must have been updated too.
You problem isn't because x is updated before the sum is updated per se. It's really about the way that you've constructed your query.
You've effectively created two queries: Observable.CombineLatest(x, y, (num1, num2) => num1 + num2) & Observable.CombineLatest(x, y, sum, (xx, yy, sumsum) => new { X = xx, Y = yy, Sum = sumsum }). Since in each you're subscribing to x then you've create two subscriptions. Meaning that when x updates then two lots of updates occur.
You need to avoid creating two subscriptions.
If you write your code like this:
BehaviorSubject<int> x = new BehaviorSubject<int>(1);
BehaviorSubject<int> y = new BehaviorSubject<int>(2);
Observable
.CombineLatest(x, y, (num1, num2) => new
{
X = num1,
Y = num2,
Sum = num1 + num2
})
.Subscribe(i => Console.WriteLine($"X:{i.X} Y:{i.Y} Sum:{i.Sum}"));
x.OnNext(3);
...then you correctly get this output:
X:1 Y:2 Sum:3
X:3 Y:2 Sum:5
I've started to get my head around this some more. Here is a more detailed example of what I'm trying to accomplish. This is some code that validates a first and last name, and should only generate a whole name when both parts are valid. As you can see I'm trying to use a bunch of small independently defined functions, like "firstIsValid", and then compose them together to calculate something more complex.
It seems like the challenge I'm facing here is trying to correlate inputs and outputs in my functions. For example, "firstIsValid" generates an output that says some first name was valid, but doesn't tell you which one. In option 2 below, I'm able to correlate them using Zip.
This strategy won't work if a validation function does not generate one output for each input. For example, if the user is typing web addresses and we're trying to validate them on the web, maybe we'd do a Throttle and/or Switch. There might be 10 web addresses for a single "webAddressIsValid". In that situation, I think I have to include the output with the input. Maybe have an IObservable> where the string is the web address and the bool is whether it is valid or not.
static void Main(string[] args) {
var first = new BehaviorSubject<string>(null);
var last = new BehaviorSubject<string>(null);
var firstIsValid = first.Select(i => string.IsNullOrEmpty(i) || i.Length < 3 ? false : true);
var lastIsValid = last.Select(i => string.IsNullOrEmpty(i) || i.Length < 3 ? false : true);
// OPTION 1 : Does not work
// Output: bob smith, bob, bob roberts, roberts
// firstIsValid and lastIsValid are not in sync with first and last
//var whole = Observable
// .CombineLatest(first, firstIsValid, last, lastIsValid, (f, fv, l, lv) => new {
// First = f,
// Last = l,
// FirstIsValid = fv,
// LastIsValid = lv
// })
// .Where(i => i.FirstIsValid && i.LastIsValid)
// .Select(i => $"{i.First} {i.Last}");
// OPTION 2 : Works as long as every change in a core data value generates one calculated value
// Output: bob smith, bob robert
var firstValidity = Observable.Zip(first, firstIsValid, (f, fv) => new { Name = f, IsValid = fv });
var lastValidity = Observable.Zip(last, lastIsValid, (l, lv) => new { Name = l, IsValid = lv });
var whole =
Observable.CombineLatest(firstValidity, lastValidity, (f, l) => new { First = f, Last = l })
.Where(i => i.First.IsValid && i.Last.IsValid)
.Select(i => $"{i.First.Name} {i.Last.Name}");
whole.Subscribe(i => Console.WriteLine(i));
first.OnNext("bob");
last.OnNext("smith");
last.OnNext(null);
last.OnNext("roberts");
first.OnNext(null);
Console.ReadLine();
}
Another approach here. Each value gets a version number (like a timestamp). Any time a calculated value is older than the data (or other calculated values it relies upon) we can ignore it.
public class VersionedValue {
static long _version;
public VersionedValue() { Version = Interlocked.Increment(ref _version); }
public long Version { get; }
}
public class VersionedValue<T> : VersionedValue {
public VersionedValue(T value) { Value = value; }
public T Value { get; }
public override string ToString() => $"{Value} {Version}";
}
public static class ExtensionMethods {
public static IObservable<VersionedValue<T>> Versioned<T>(this IObservable<T> values) => values.Select(i => new VersionedValue<T>(i));
public static VersionedValue<T> AsVersionedValue<T>(this T obj) => new VersionedValue<T>(obj);
}
static void Main(string[] args) {
// same as before
//
var whole = Observable
.CombineLatest(first.Versioned(), firstIsValid.Versioned(), last.Versioned(), lastIsValid.Versioned(), (f, fv, l, lv) => new {
First = f,
Last = l,
FirstIsValid = fv,
LastIsValid = lv
})
.Where(i => i.FirstIsValid.Version > i.First.Version && i.LastIsValid.Version > i.Last.Version)
.Where(i => i.FirstIsValid.Value && i.LastIsValid.Value)
.Select(i => $"{i.First.Value} {i.Last.Value}");