Unable to create MarkLogic scheduled tasks from within CPF action module - scheduled-tasks

I have a MarkLogic database with Content Processing Framework (CPF) installed and the CPF pipeline is such that:
Whenever a document is inserted then it grabs the value of execution-date from the document and schedule a task for that time.
Example:
Sample document:
<sample>
<execution-date>2014-10-20T12:29:10</execution-date>
</sample>
when inserted triggers the CPF action module which reads the value of execution-date field and creates a scheduled task to be executed on the time read from execution-date field.
Following is the XQuery code snippet from the CPF action module that creates the scheduled task:
let $doc := fn:doc( $cpf:document-uri )
let $releasedon := xs:string($doc/sample/execution-date/text())
let $config := admin:get-configuration()
let $group := admin:group-get-id($config, "Default")
let $new-task :=
admin:group-one-time-scheduled-task(
"/tasks/task.xqy",
"/",
xs:dateTime($releasedon),
xdmp:database("SampleDB"),
xdmp:database("Modules"),
xdmp:user("admin"),
(),
"normal")
let $addTask := admin:group-add-scheduled-task($config,$group, $new-task)
return
admin:save-configuration($addTask),
xdmp:log(fn:concat("Task for document Uri: ", $cpf:document-uri, " created"))
Now, when I insert single document then everything works as expected, that is:
Document inserted successfully
the CPF action module is triggered successfully
Scheduled task created successfully.
But, when I try to insert multiple documents using:
xdmp:document-insert("/1.xml",
<sample>
<execution-date>2014-10-21T10:00:00</execution-date>
</sample>,
xdmp:default-permissions(),
("documents"))
,
xdmp:document-insert("/2.xml",
<sample>
<execution-date>2014-10-20T11:00:00</execution-date>
</sample>,
xdmp:default-permissions(),
("documents"))
CPF action module gets triggered successfully (log message can be seen in logs) BUT
ONLY one scheduled task gets created.
When looking in MarkLogic Admin Interface I can only find a single scheduled task which is scheduled to run at 2014-10-20T11:00:00
Please let me know what am I doing wrong or is there any configuration I am missing.
Any suggestions are welcomed.
Thanks!

The fundamental issue here is that the admin configuration manipulation APIs are not transactionally protected operations, so when you run two in parallel each one sees the initial state of the configuration files, then writes their bit to add the scheduled task, and then saves it, and only one of them wins. You can force this to behave in a transactionally protected way by forcing a lock on some URI It doesn't matter what it is. It doesn't even have to be in the database. As long as everything that is doing this is locking on the same URI you are fine. xdmp:lock-for-update("my.example.uri") will do this.

The following CPF action module is now working as expected:
xquery version "1.0-ml";
import module namespace cpf = "http://marklogic.com/cpf" at "/MarkLogic/cpf/cpf.xqy";
import module namespace admin = "http://marklogic.com/xdmp/admin" at "/MarkLogic/admin.xqy";
declare variable $cpf:document-uri as xs:string external;
declare variable $cpf:transition as node() external;
declare function local:scheduleTask()
{
xdmp:lock-for-update("/sample.xml"),
if (cpf:check-transition($cpf:document-uri,$cpf:transition)) then try
{
let $doc := fn:doc( $cpf:document-uri )
let $releasedon := xs:string($doc/sample/execution-date/text())
let $config := admin:get-configuration()
let $group := admin:group-get-id($config, "Default")
let $new-task :=
admin:group-one-time-scheduled-task(
"/tasks/task.xqy",
"/",
xs:dateTime($releasedon),
xdmp:database("SampleDB"),
xdmp:database("Modules"),
xdmp:user("admin"),
(),
"normal")
let $addTask := admin:group-add-scheduled-task($config,$group, $new-task)
return
admin:save-configuration($addTask),
xdmp:log(fn:concat("Task for document Uri: ", $cpf:document-uri, " created"))
}
catch ($e) {
cpf:failure( $cpf:document-uri, $cpf:transition, $e, () )
}
else ( )
};
local:scheduleTask()

Related

mongodb-rust-driver perform poorly on find and get large amount of data compare to go-driver

I have a database consist of 85.4k of document with average size of 4kb
I write a simple code in go to find and get over 70k document from the database using mongodb-go-driver
package main
import (
"context"
"log"
"time"
"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
)
func main() {
localC, _ := mongo.Connect(context.TODO(), options.Client().ApplyURI("mongodb://127.0.0.1:27017/?gssapiServiceName=mongodb"))
localDb := localC.Database("sampleDB")
collect := localDb.Collection("sampleCollect")
localCursor, _ := collect.Find(context.TODO(), JSON{
"deleted": false,
})
log.Println("start")
start := time.Now()
var result []map[string] interface{} = make([]map[string] interface{}, 0)
localCursor.All(context.TODO(), &result)
log.Println(len(result))
log.Println("done")
log.Println(time.Now().Sub(start))
}
Which done in around 20 seconds
2021/03/21 01:36:43 start
2021/03/21 01:36:56 70922
2021/03/21 01:36:56 done
2021/03/21 01:36:56 20.0242869s
After that, I try to implement the similar thing in rust using mongodb-rust-driver
use mongodb::{
bson::{doc, Document},
error::Error,
options::FindOptions,
Client,
};
use std::time::Instant;
use tokio::{self, stream::StreamExt};
#[tokio::main]
async fn main() {
let client = Client::with_uri_str("mongodb://localhost:27017/")
.await
.unwrap();
let db = client.database("sampleDB");
let coll = db.collection("sampleCollect");
let find_options = FindOptions::builder().build();
let cursor = coll
.find(doc! {"deleted": false}, find_options)
.await
.unwrap();
let start = Instant::now();
println!("start");
let results: Vec<Result<Document, Error>> = cursor.collect().await;
let es = start.elapsed();
println!("{}", results.iter().len());
println!("{:?}", es);
}
But it took almost 1 minutes to complete the same task on release build
$ cargo run --release
Finished release [optimized] target(s) in 0.43s
Running `target\release\rust-mongo.exe`
start
70922
51.1356069s
May I know the performance on rust in this case is consider normal or I made some mistake on my rust code and it could be improve?
EDIT
As comment suggested, here is the Example document
The discrepancy here was due to some known bottlenecks in the Rust driver that have since been addressed in the latest beta release (2.0.0-beta.3); so, upgrading your mongodb dependency to use that version should solve the issue.
Re-running your examples with 10k copies of the provided sample document, I now see the Rust one taking ~3.75s and the Go one ~5.75s on my machine.

airflow TriggerDagRunOperator how to change the execution date

I noticed that for scheduled task the execution date is set in the past according to
Airflow was developed as a solution for ETL needs. In the ETL world,
you typically summarize data. So, if I want to summarize data for
2016-02-19, I would do it at 2016-02-20 midnight GMT, which would be
right after all data for 2016-02-19 becomes available.
however, when a dag triggers another dag the execution time is set to now().
Is there a way to have the triggered dags with the same execution time of triggering dag? Of course, I can rewrite the template and use yesterday_ds, however, this is a tricky solution.
The following class expands on TriggerDagRunOperator to allow passing the execution date as a string that then gets converted back into a datetime. It's a bit hacky but it is the only way I found to get the job done.
from datetime import datetime
import logging
from airflow import settings
from airflow.utils.state import State
from airflow.models import DagBag
from airflow.operators.dagrun_operator import TriggerDagRunOperator, DagRunOrder
class MMTTriggerDagRunOperator(TriggerDagRunOperator):
"""
MMT-patched for passing explicit execution date
(otherwise it's hard to hook the datetime.now() date).
Use when you want to explicity set the execution date on the target DAG
from the controller DAG.
Adapted from Paul Elliot's solution on airflow-dev mailing list archives:
http://mail-archives.apache.org/mod_mbox/airflow-dev/201711.mbox/%3cCAJuWvXgLfipPmMhkbf63puPGfi_ezj8vHYWoSHpBXysXhF_oZQ#mail.gmail.com%3e
Parameters
------------------
execution_date: str
the custom execution date (jinja'd)
Usage Example:
-------------------
my_dag_trigger_operator = MMTTriggerDagRunOperator(
execution_date="{{execution_date}}"
task_id='my_dag_trigger_operator',
trigger_dag_id='my_target_dag_id',
python_callable=lambda: random.getrandbits(1),
params={},
dag=my_controller_dag
)
"""
template_fields = ('execution_date',)
def __init__(
self, trigger_dag_id, python_callable, execution_date,
*args, **kwargs
):
self.execution_date = execution_date
super(MMTTriggerDagRunOperator, self).__init__(
trigger_dag_id=trigger_dag_id, python_callable=python_callable,
*args, **kwargs
)
def execute(self, context):
run_id_dt = datetime.strptime(self.execution_date, '%Y-%m-%d %H:%M:%S')
dro = DagRunOrder(run_id='trig__' + run_id_dt.isoformat())
dro = self.python_callable(context, dro)
if dro:
session = settings.Session()
dbag = DagBag(settings.DAGS_FOLDER)
trigger_dag = dbag.get_dag(self.trigger_dag_id)
dr = trigger_dag.create_dagrun(
run_id=dro.run_id,
state=State.RUNNING,
execution_date=self.execution_date,
conf=dro.payload,
external_trigger=True)
logging.info("Creating DagRun {}".format(dr))
session.add(dr)
session.commit()
session.close()
else:
logging.info("Criteria not met, moving on")
There is an issue you may run into when using this and not setting execution_date=now(): your operator will throw a mysql error if you try to start a dag with an identical execution_date twice. This is because the execution_date and dag_id are used to create the row index and rows with identical indexes cannot be inserted.
I can't think of a reason you would ever want to run two identical dags with the same execution_date in production anyway, but it is something I ran into while testing and you should not be alarmed by it. Simply clear the old job or use a different datetime.
The TriggerDagRunOperator now has an execution_date parameter to set the execution date of the triggered run.
Unfortunately the parameter is not in the template fields.
If it will be added to template fields (or if you override the operator and change the template_fields value) it will be possible to use it like this:
my_trigger_task= TriggerDagRunOperator(task_id='my_trigger_task',
trigger_dag_id="triggered_dag_id",
python_callable=conditionally_trigger,
execution_date= '{{execution_date}}',
dag=dag)
It has not been released yet but you can see the sources here:
https://github.com/apache/incubator-airflow/blob/master/airflow/operators/dagrun_operator.py
The commit that did the change was:
https://github.com/apache/incubator-airflow/commit/089c996fbd9ecb0014dbefedff232e8699ce6283#diff-41f9029188bd5e500dec9804fed26fb4
I improved a bit the MMTTriggerDagRunOperator. The function checks if the dag_run already exists, if found, restart the dag using the clear function of airflow. This allows us to create a dependency between dags because the possibility to have the execution date moved to the triggered dag opens a whole universe of amazing possibilities. I wonder why this is not the default behavior in airflow.
def execute(self, context):
run_id_dt = datetime.strptime(self.execution_date, '%Y-%m-%d %H:%M:%S')
dro = DagRunOrder(run_id='trig__' + run_id_dt.isoformat())
dro = self.python_callable(context, dro)
if dro:
session = settings.Session()
dbag = DagBag(settings.DAGS_FOLDER)
trigger_dag = dbag.get_dag(self.trigger_dag_id)
if not trigger_dag.get_dagrun( self.execution_date ):
dr = trigger_dag.create_dagrun(
run_id=dro.run_id,
state=State.RUNNING,
execution_date=self.execution_date,
conf=dro.payload,
external_trigger=True
)
logging.info("Creating DagRun {}".format(dr))
session.add(dr)
session.commit()
else:
trigger_dag.clear(
start_date = self.execution_date,
end_date = self.execution_date,
only_failed = False,
only_running = False,
confirm_prompt = False,
reset_dag_runs = True,
include_subdags= False,
dry_run = False
)
logging.info("Cleared DagRun {}".format(trigger_dag))
session.close()
else:
logging.info("Criteria not met, moving on")
There is a function available in the experimental API section of airflow that allows you to trigger a dag with a specific execution date. https://github.com/apache/incubator-airflow/blob/master/airflow/api/common/experimental/trigger_dag.py
You can call this function as a part of PythonOperator and achieve the objective.
So it will look like
from airflow.api.common.experimental.trigger_dag import trigger_dag
trigger_operator=PythonOperator(task_id='YOUR_TASK_ID',
python_callable=trigger_dag,
op_args=['dag_id'],
op_kwargs={'execution_date': datetime.now()})

cannot find document just created in a trigger

I have the following issue.
We use the OBI framework of MarkLogic
I use a trigger in the database to monitor incomming sensor data.
As soon as a new OBI source is loaded I check some stuff.
If I find something I create an "Alert" object.
Then I would like to use that object the generate a"payload" message to send out to mobile devices...
Now trigger works.
In the trigger I use a xdmp:eval with a different transcaction to make sure I can use the document created in the same trigger code...
BUT if I search for the newly created object it cannot be found...
Can I create a document and use it in the same trigger code?
I am afraid it is hard to create a minimal working example here but this is a try:
Relevant trigger code:
(: fire trigger ALWAYS :)
let $_ := xdmp:log(fn:concat('TP-SENSORTRIGGER-ACTION : Source ', $trgr:uri, ' triggers base rule...'))
(: create alert object plus link to source :)
let $object-id := xdmp:eval(concat('
xquery version "1.0-ml";
import module namespace scl = "http://example.com/sccs/lib" at "/lib/sccss-lib.xqy";
declare variable $source-id external;
let $object-id := scl:create-alert-object($source-id)
let $_ := xdmp:log("***** test *****")
return $object-id
'), (xs:QName('source-id'), $source-id),
<options xmlns="xdmp:eval"><isolation>different-transaction</isolation></options>)
let $_ := xdmp:log($object-id)
(: create payload from alert object :)
(:let $payload := scl:create-payload-from-alert-object($object-id)
:)
let $object := obj:find-object($object-id)
let $_ := xdmp:log($object)
The function to create the OBI object is in a lib. I can share if needed.
I have added aa log line "test" and "test B" to make sure I use the proper triggers. And to make sure they are rebuild on deploy in app-specific.rb.
Then from the logs it is clear the object cannot be found:
2015-10-20 15:33:02.860 Info: example-app: ******** Ingest transform Started ***************
2015-10-20 15:33:04.196 Info: TaskServer: TP-SENSORTRIGGER /marklogic.solutions.obi/source/81a3591a-a885-4f85-a781-b066e706ff41.xml was created, start trigger action...
2015-10-20 15:33:04.291 Info: TaskServer: TP-SENSORTRIGGER-ACTION : Source /marklogic.solutions.obi/source/81a3591a-a885-4f85-a781-b066e706ff41.xml triggers base rule...
2015-10-20 15:33:07.267 Info: TaskServer: ***** test B *****
2015-10-20 15:33:07.268 Info: TaskServer: ***** test *****
2015-10-20 15:33:07.273 Info: TaskServer: 4d1fd4e4-2911-40b0-848c-ccf8eaa39229
2015-10-20 15:33:07.277 Info: TaskServer:
After the trigger has run I can of course find the object with id 4d1fd4e4-2911-40b0-848c-ccf8eaa39229 from the QC.
So this has to do with the transaction model of MarkLogic I guess.
So again the question: Why cant I find the object I just created in a xdmp:eval ?
SOLUTION
It turned out you have to wrap ALL functions downstream in a xdmp:eval() for this to work.
(: fire trigger ALWAYS :)
let $_ := xdmp:log(fn:concat('TP-SENSORTRIGGER-ACTION : Source ', $trgr:uri, ' triggers base rule...'))
(: create alert object plus link to source :)
let $object-id := xdmp:eval(concat('
xquery version "1.0-ml";
import module namespace scl = "http://example.com/sccs/lib" at "/lib/example-lib.xqy";
declare variable $source-id external;
declare variable $actions external;
declare variable $lastseen external;
let $object-id := scl:create-alert-object($source-id,$actions,$lastseen)
return $object-id
'), (xs:QName('source-id'), $source-id,xs:QName('actions'), $actions,xs:QName('lastseen'), $lastseen),
<options xmlns="xdmp:eval"><isolation>different-transaction</isolation></options>)
let $_ := xdmp:log($object-id)
(: create payload from alert object :)
(:let $payload := scl:create-payload-from-alert-object($object-id)
:)
let $payload := xdmp:eval(concat('
xquery version "1.0-ml";
import module namespace scl = "http://example.com/sccs/lib" at "/lib/sccss-lib.xqy";
declare variable $object-id external;
let $payload := scl:create-payload-from-alert-object($object-id)
return $payload
'), (xs:QName('object-id'), $object-id),
<options xmlns="xdmp:eval"><isolation>different-transaction</isolation></options>)
let $_ := xdmp:log("************* PAYLOAD ****************")
let $_ := xdmp:log($payload)
(: send alert to middletier :)
let $result := xdmp:eval(concat('
xquery version "1.0-ml";
import module namespace scl = "http://example.com/sccs/lib" at "/lib/sccss-lib.xqy";
declare variable $payload external;
let $res := scl:send-alert-notification($payload)
return $res
'), (xs:QName('payload'), $payload),
<options xmlns="xdmp:eval"><isolation>different-transaction</isolation></options>)
let $_ := xdmp:log("************* RESULT ****************")
let $_ := xdmp:log($result)
This is not clear from the documentation of the xdmp:eval() function :
" When set to different-transaction, the statement is evaluated in a
separate transaction from the one in which it is called, making those
updates available to subsequent expressions in the calling statement
(assuming the calling statement is an update statement; if the calling
statement is not an update, then subsequent expressions will see the
version of the database at the system timestamp when the calling
statement begins its evaluation)."

Trigger works but test doesn't cover 75% of the code

I have a trigger which works in the sandbox. The workflow checks the field in the campaign level and compares it with the custom setting. If it matches, then it returns the target to the DS Multiplier field. The trigger looks as follows
trigger PopulateTarget on Campaign (before insert, before update)
{
for(Campaign campaign : Trigger.new)
{
if (String.isNotBlank(campaign.Apex_Calculator__c) == true)
{
DSTargets__c targetInstance = DSTargets__c.getInstance(campaign.Apex_Calculator__c);
{
String target = targetInstance .Target__c;
campaign.DS_Target_Multiplier__c = Target;
}
}
}
}
However, I had problems to write a proper test to this and asked for the help on the internet. I received the test
#isTest
private class testPopulateTarget{
static testMethod void testMethod1(){
// Load the Custom Settings
DSTargets__c testSetting = new DSTargets__c(Name='Africa - 10 Weeks; CW 10',Target__c='0.1538', SetupOwnerId = apexCalculatorUserId);
insert testSetting;
// Create Campaign. Since it would execute trigger, put it in start and stoptests
Test.startTest();
Campaign testCamp = new Campaign();
// populate all reqd. fields.
testCamp.Name = 'test DS campaign';
testCamp.RecordTypeId = '012200000001b3v';
testCamp.Started_Campaign_weeks_before_Event__c = '12 Weeks';
testCamp.ParentId= '701g0000000EZRk';
insert testCamp;
Test.stopTest();
testCamp = [Select ID,Apex_Calculator__c,DS_Target_Multiplier__c from Campaign where Id = :testCamp.Id];
system.assertEquals(testCamp.DS_Target_Multiplier__c,testSetting.Target__c);// assert that target is populated right
}
}
Such test returns the error "Compile Error: Variable does not exist: apexCalculatorUserId at line 6 column 122". If I remove that ApexCalculator part System.assertEquals then the test passes. However it covers 4/6 part of the code (which is 66%)
Could anyone help me how should I amend the code to make the coverage of 75%?
Yes, apexCalculatorUserId has not been defined. The code you were given appears to be incomplete. You'll need to look at the constructor DSTargets__c and see what kind of ID it is expecting there.
At a guess, you could try UserInfo.getUserId() to get the ID of the current user, but that may not be the ID that's expected in the constructor. It would be worth trying it to see if the test coverage improves.
1) Replace apexCalculatorUserId with UserInfo.getUserId()
2) I'm not sure what kind of field is Apex_Calculator__c on campaign. If its not a formula you want to insert a new line before "insert testCamp". Something like:
testCamp.Apex_Calculator__c = UserInfo.getUserId();

If Condition followed by a for loop not executing in Xquery

In one of the SOAP responses, I was trying to use the following Xquery code to check a condition followed by for loop. I was trying to get a count of some element and then use the if condition and based on that if condition, it should execute the for loop. However there is an exception that shows up .
Here is my Xquery bit in the SOAP UI.
declare variable $datesList := ("2013-01-01-00.30.00","2013-01-01-01.00.00","2013-01-01-01.30.00","2013-01-01-02.00.00","2013-01-01-02.30.00","2013-01-01-03.00.00","2013-01-01-03.30.00","2013-01-01-04.00.00");
<res>
{
let $mcId1 :=count(//ZZQAD2UsageTransactionSVC/usagePeriods/usagePeriodsList/SQs/SQsList[1]/mL)
let $mcId2 :=count(//ZZQAD2UsageTransactionSVC/usagePeriods/usagePeriodsList/SQs/SQsList[2]/mL)
if($mcId1=8)
{
for $mlList in //ZZQAD2UsageTransactionSVC/usagePeriods/usagePeriodsList/SQs/SQsList[1]/intervals/mL
return(if($mcId1 > $mcId2)
then <text>true</text>
else <text>false</text>)
}
}
Here is the exception that appears during run time.
RuntimeException:java.lang.reflect.InvocationTargetException
So I want to seek advice from the seniors and gurus, if the piece of Xquery code is correct?
Thanks much in advance.
There are multiple syntax errors in your query:
let clauses have to be part of a FLWOR expression, which always ends with a return clause.
if cannot be used without then and else and does not use curly braces.
The opening tag <res> needs a matching closing tag </res> at the end of the query.
The corrected query looks like this:
declare variable $datesList := (
"2013-01-01-00.30.00", "2013-01-01-01.00.00",
"2013-01-01-01.30.00", "2013-01-01-02.00.00",
"2013-01-01-02.30.00", "2013-01-01-03.00.00",
"2013-01-01-03.30.00", "2013-01-01-04.00.00"
);
<res>{
let $mcId1 := count(//ZZQAD2UsageTransactionSVC/usagePeriods/usagePeriodsList/SQs/SQsList[1]/mL)
let $mcId2 := count(//ZZQAD2UsageTransactionSVC/usagePeriods/usagePeriodsList/SQs/SQsList[2]/mL)
return if($mcId1 = 8) then (
for $mlList in //ZZQAD2UsageTransactionSVC/usagePeriods/usagePeriodsList/SQs/SQsList[1]/intervals/mL
return if($mcId1 > $mcId2)
then <text>true</text>
else <text>false</text>
) else ()
}</res>