Meteor, MongoDB - db.collection.find() for OR condition - mongodb

In MongoDB, I have the following JSONs in a collection named "Jobs"
{
"userId": "testUser1",
"default": "true",
"someData": "data"
},
{
"userId": "testUser1",
"default": "false",
"someData": "data"
},
{
"userId": "testUser2",
"default": "true",
"someData": "data"
},
{
"userId": "testUser2",
"default": "false",
"someData": "data"
}
In Meteor, I am trying to select based on two condition
- Select documents for the given userId OR default is true
I have the following code in meteor:
Jobs.find({$or:[{userid:"testUser1"}, {default:"true"}]});
But it is selecting only two JSONs:
{
"userId": "testUser1",
"default": "true",
"someData": "data"
},
{
"userId": "testUser1",
"default": "false",
"someData": "data"
}
and its NOT giving the below JSON in response:
{
"userId": "testUser2",
"default": "true",
"someData": "data"
}
I researched with $where but even that is not working.
How to retrieve the right document from the MongoDB?

Try without $or
Jobs.find({userId: "testUser2", "default": "true"});

Just to be clear, you're trying to get all three of the records you mention, right? If so, I think your issue is that the 'true' values are strings, not bools and I'm guessing that you're searching on bool. Try this:
db.Jobs.find()
{"userId" : "testUser1", "default" : "true", "someData" : "data" }
{"userId" : "testUser1", "default" : "false", "someData" : "data" }
{"userId" : "testUser2", "default" : "true", "someData" : "data" }
{"userId" : "testUser2", "default" : "false", "someData" : "data" }
db.Jobs.find({ $or: [{ userId: 'testUser1' }, { default : 'true' } ] })
{"userId" : "testUser1", "default" : "true", "someData" : "data" }
{"userId" : "testUser1", "default" : "false", "someData" : "data" }
{"userId" : "testUser2", "default" : "true", "someData" : "data" }

Related

Kafka Connect SftpCSVSourceConnector schema configuration

I'm trying to setup an SftpCSVSourceConnector in my local env and I'm having some trouble setting a schema to the connector. This is what I'm trying to do
curl -i -X PUT -H "Accept:application/json" \
-H "Content-Type:application/json" http://localhost:8083/connectors/nc-csv-02/config \
-d '{
"tasks.max" : "1",
"connector.class" : "io.confluent.connect.sftp.SftpCsvSourceConnector",
"kafka.topic": "sftp-csv-00",
"cleanup.policy":"NONE",
"behavior.on.error":"IGNORE",
"key.converter": "io.confluent.connect.avro.AvroConverter",
"value.converter": "io.confluent.connect.avro.AvroConverter",
"input.path" : "/",
"csv.separator.char" : 59,
"finished.path" : "/finished",
"error.path" : "/error",
"schema.generation.key.fields" : "msisdn",
"input.file.pattern" : ".*\\.dat",
"schema.generation.enabled" : "false",
"csv.first.row.as.header" : "true",
"key.schema":"{\"fields\":[{\"default\":null,\"name\":\"msisdn\",\"type\":[\"null\",\"string\"]}],\"name\":\"NCKeySchema\",\"type\":\"record\"}",
"value.schema":"{\"name\":\"NCPortabilityMovementEvent\",\"type\":\"record\",\"fields\":[{\"default\":null,\"name\":\"action\",\"type\":[\"null\",\"string\"]},{\"default\":null,\"name\":\"msisdn\",\"type\":[\"null\",\"string\"]},{\"default\":null,\"name\":\"previousNRN\",\"type\":[\"null\",\"string\"]},{\"default\":null,\"name\":\"newNRN\",\"type\":[\"null\",\"string\"]},{\"default\":null,\"name\":\"effectiveDate\",\"type\":[\"null\",\"string\"]},{\"default\":null,\"name\":\"referenceID\",\"type\":[\"null\",\"string\"]}]}",
"sftp.username":"tester",
"sftp.password":"password",
"sftp.host":"192.168.1.2",
"sftp.port":"22"
}'
The exception I see in the worker task is
org.apache.kafka.common.config.ConfigException: Invalid value com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException: Unrecognized field "fields" (class com.github.jcustenborder.kafka.connect.utils.jackson.SchemaSerializationModule$Storage), not marked as ignorable (10 known properties: "defaultValue", "valueSchema", "doc", "type", "name", "keySchema", "version", "parameters", "isOptional", "fieldSchemas"])
at [Source: (String)"{"fields":[{"default":null,"name":"msisdn","type":["null","string"]}],"name":"NCKeySchema","type":"record"}"; line: 1, column: 12] (through reference chain: com.github.jcustenborder.kafka.connect.utils.jackson.SchemaSerializationModule$Storage["fields"]) for configuration Could not read schema from 'key.schema'
at io.confluent.connect.sftp.source.SftpSourceConnectorConfig.readSchema(SftpSourceConnectorConfig.java:334)
at io.confluent.connect.sftp.source.SftpSourceConnectorConfig.<init>(SftpSourceConnectorConfig.java:117)
at io.confluent.connect.sftp.source.SftpCsvSourceConnectorConfig.<init>(SftpCsvSourceConnectorConfig.java:156)
at io.confluent.connect.sftp.SftpCsvSourceConnector.start(SftpCsvSourceConnector.java:44)
at org.apache.kafka.connect.runtime.WorkerConnector.doStart(WorkerConnector.java:185)
at org.apache.kafka.connect.runtime.WorkerConnector.start(WorkerConnector.java:210)
at org.apache.kafka.connect.runtime.WorkerConnector.doTransitionTo(WorkerConnector.java:349)
at org.apache.kafka.connect.runtime.WorkerConnector.doTransitionTo(WorkerConnector.java:332)
at org.apache.kafka.connect.runtime.WorkerConnector.doRun(WorkerConnector.java:141)
at org.apache.kafka.connect.runtime.WorkerConnector.run(WorkerConnector.java:118)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
The schemas I'm trying to use for key and value are
{
"fields": [
{
"default": null,
"name": "msisdn",
"type": [
"null",
"string"
]
}
],
"name": "NCKeySchema",
"type": "record"
}
and
{
"name" : "NCPortabilityMovementEvent",
"type" : "record",
"fields" : [
{
"default" : null,
"name" : "action",
"type" : [
"null",
"string"
]
},
{
"default" : null,
"name" : "msisdn",
"type" : [
"null",
"string"
]
},
{
"default" : null,
"name" : "previousNRN",
"type" : [
"null",
"string"
]
},
{
"default" : null,
"name" : "newNRN",
"type" : [
"null",
"string"
]
},
{
"default" : null,
"name" : "effectiveDate",
"type" : [
"null",
"string"
]
},
{
"default" : null,
"name" : "referenceID",
"type" : [
"null",
"string"
]
}
]
}
What am I doing wrong here ?
I tried this with schema.generation.enabled=true and removing the key.schema and value.schema the connector worked just fine.
You're providing Avro schemas, which are not correct. You'll need to define Connect schemas, which are type=STRUCT with fieldSchemas. The format itself is not well documented, but there are examples here https://docs.confluent.io/kafka-connect-sftp/current/source-connector/csv_source_connector.html#sftp-connector-csv-with-schema-example
You can find the source code of the schema json deserializer here - https://github.com/jcustenborder/connect-utils/tree/master/connect-utils-jackson/src/main/java/com/github/jcustenborder/kafka/connect/utils/jackson

how to send data to druid using Tranquility core API?

I have setup druid and was able to run the tutorial at:Tutorial: Loading a file. I was also able to execute native json queries and get the results as described at : http://druid.io/docs/latest/tutorials/tutorial-query.html The druid setup is working fine.
I now want to ingest additional data from a Java program into this datasource. Is it possible to send data into druid using tranquility from a java program for a datasource created using batch load?
I tried the example program at : https://github.com/druid-io/tranquility/blob/master/core/src/test/java/com/metamx/tranquility/example/JavaExample.java
But this program just keeps running and doesn't show any output. how can druid be setup to accept data using tranquility core APIs?
Following are the ingestion specs and config file for tranquility:
wikipedia-index.json
{
"type" : "index",
"spec" : {
"dataSchema" : {
"dataSource" : "wikipedia",
"parser" : {
"type" : "string",
"parseSpec" : {
"format" : "json",
"dimensionsSpec" : {
"dimensions" : [
"channel",
"cityName",
"comment",
"countryIsoCode",
"countryName",
"isAnonymous",
"isMinor",
"isNew",
"isRobot",
"isUnpatrolled",
"metroCode",
"namespace",
"page",
"regionIsoCode",
"regionName",
"user",
{ "name": "added", "type": "long" },
{ "name": "deleted", "type": "long" },
{ "name": "delta", "type": "long" }
]
},
"timestampSpec": {
"column": "time",
"format": "iso"
}
}
},
"metricsSpec" : [],
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "day",
"queryGranularity" : "none",
"intervals" : ["2015-09-12/2015-09-13"],
"rollup" : false
}
},
"ioConfig" : {
"type" : "index",
"firehose" : {
"type" : "local",
"baseDir" : "quickstart/",
"filter" : "wikiticker-2015-09-12-sampled.json.gz"
},
"appendToExisting" : false
},
"tuningConfig" : {
"type" : "index",
"targetPartitionSize" : 5000000,
"maxRowsInMemory" : 25000,
"forceExtendableShardSpecs" : true
}
}
}
example.json (tranquility config):
{
"dataSources" : [
{
"spec" : {
"dataSchema" : {
"dataSource" : "wikipedia",
"metricsSpec" : [
{ "type" : "count", "name" : "count" }
],
"granularitySpec" : {
"segmentGranularity" : "hour",
"queryGranularity" : "none",
"type" : "uniform"
},
"parser" : {
"type" : "string",
"parseSpec" : {
"format" : "json",
"timestampSpec" : { "column": "time", "format": "iso" },
"dimensionsSpec" : {
"dimensions" : ["channel",
"cityName",
"comment",
"countryIsoCode",
"countryName",
"isAnonymous",
"isMinor",
"isNew",
"isRobot",
"isUnpatrolled",
"metroCode",
"namespace",
"page",
"regionIsoCode",
"regionName",
"user",
{ "name": "added", "type": "long" },
{ "name": "deleted", "type": "long" },
{ "name": "delta", "type": "long" }]
}
}
}
},
"tuningConfig" : {
"type" : "realtime",
"windowPeriod" : "PT10M",
"intermediatePersistPeriod" : "PT10M",
"maxRowsInMemory" : "100000"
}
},
"properties" : {
"task.partitions" : "1",
"task.replicants" : "1"
}
}
],
"properties" : {
"zookeeper.connect" : "localhost"
}
}
I did not find any example on setting up a datasource on druid which accepts continuously accepts data from a java program. I don't want to use Kafka. Any pointers on this would be greatly appreciated.
You need to create the data files with the additional data first and than run the ingestion task with new fields, You can't edit the same record in druid, It overwrites to new record.

Druid count differ when we run same query on daliy and row data

When run query to ABS Data Source in Druid.I got some count but that differ when same query run with ABS_DAILY data source. And we make ABS_DAILY from ABS.
{
"queryType" : "groupBy",
"dataSource" : "ABS",
"granularity" : "all",
"intervals" : [ "2018-07-12T00:00:00.000Z/2018-07-13T00:00:00.000Z" ],
"descending" : "false",
"aggregations" : [ {
"type" : "count",
"name" : "COUNT",
"fieldName" : "COUNT"
} ],
"postAggregations" : [ ],
"dimensions" : [ "event_id" ]
}
Below json used for submit Daily job for druid which will create segments for ABS_DALIY for specific time
{
"spec": {
"ioConfig": {
"firehose": {
"dataSource": "ABS",
"interval": "2018-07-12T00:00:00.000Z/2018-07-13T00:00:00.000Z",
"metrics": null,
"dimensions": null,
"type": "ingestSegment"
},
"type": "index"
},
"dataSchema": {
"granularitySpec": {
"queryGranularity": "day",
"intervals": [
"2018-07-12T00:00:00.000Z/2018-07-13T00:00:00.000Z"
],
"segmentGranularity": "day",
"type": "uniform"
},
"dataSource": "ABS_DAILY",
"metricsSpec": [],
"parser": {
"parseSpec": {
"timestampSpec": {
"column": "server_timestamp",
"format": "dd MMMM, yyyy (HH:mm:ss)"
},
"dimensionsSpec": {
"dimensionExclusions": [
"server_timestamp"
],
"dimensions": []
},
"format": "json"
},
"type": "string"
}
}
},
"type": "index"
}
I quired to ABS_DAILY with below it return different result than ABS Count. Which it should not.
{
"queryType" : "groupBy",
"dataSource" : "ERS_DAILY",
"granularity" : "all",
"intervals" : [ "2018-07-12T00:00:00.000Z/2018-07-13T00:00:00.000Z" ],
"descending" : "false",
"aggregations" : [ {
"type" : "count",
"name" : "COUNT",
"fieldName" : "COUNT"
} ],
"postAggregations" : [ ],
"dimensions" : [ "event_id" ]
}
You are counting rows of daily aggregates.
To summarize pre-aggregated counts you now need to sum the count column (see type)
{
"queryType" : "groupBy",
"dataSource" : "ERS_DAILY",
"granularity" : "all",
"intervals" : [ "2018-07-12T00:00:00.000Z/2018-07-13T00:00:00.000Z" ],
"descending" : "false",
"aggregations" : [ {
"type" : "longSum",
"name" : "COUNT",
"fieldName" : "COUNT"
} ],
"postAggregations" : [ ],
"dimensions" : [ "event_id" ]
}

Kapacitor how to create task using template via the rest api?

I can successfully create templates and tasks using the rest api.
How do i create a task using a template via rest api?
Which endpoint should i use?
Okay found out how:
Basically just use the same task rest endpoint and do a post and pass in the json.
In the json you can specify templateId and the vars like below.
{
"status": "disabled"
,"id": "test_task4"
,"template-id": "generic_mean_alert"
,"vars" : {
"measurement": {"type" : "string", "value" : "cpu" },
"where_filter": {"type": "lambda", "value": "\"cpu\" == 'cpu-total'"},
"groups": {"type": "list", "value": [{"type":"string", "value":"host"},{"type":"string", "value":"dc"}]},
"field": {"type" : "string", "value" : "usage_idle" },
"warn": {"type" : "lambda", "value" : "\"mean\" < 30.0" },
"crit": {"type" : "lambda", "value" : "\"mean\" < 10.0" },
"window": {"type" : "duration", "value" : "1m" },
"slack_channel": {"type" : "string", "value" : "#alerts_testing" }
}
,"dbrps": [ { "db": "test","rp": "autogen" } ]
,"type": "stream"
}
http://yoururl/kapacitor/v1/tasks

mapping in create index in elasticsearch through mongodb river is not taking effect

I am trying to index mongodb in elasticsearch using mongodb-river using the following command but the document mapping is not taking effect. It is still using the default analyzer(standard) for field text
Mongodb-river
The document specifies the creation of index but there is no documentation on how to provide custom mapping. This is what I tried. Is there any other documentation where I can find how to specify custom analyzers etc in using mongodb-river.
curl -XPUT "localhost:9200/_river/autocompleteindex/_meta" -d '
{
"type": "mongodb",
"mongodb": {
"host": "rahulg-dc",
"port": "27017",
"db": "qna",
"collection": "autocomplete_questions"
},
"index": {
"name": "autocompleteindex",
"type": "autocomplete_questions",
"analysis" : {
"analyzer" : {
"str_search_analyzer" : {
"tokenizer" : "keyword",
"filter" : ["lowercase"]
},
"str_index_analyzer" : {
"tokenizer" : "keyword",
"filter" : ["lowercase", "ngram"]
}
},
"filter" : {
"ngram" : {
"type" : "ngram",
"min_gram" : 2,
"max_gram" : 20
}
}
}
},
"autocompleteindex": {
"_boost" : {
"name" : "po",
"null_value" : 1.0
},
"properties": {
"po": {
"type": "double"
},
"text": {
"type": "string",
"boost": 3.0,
"search_analyzer" : "str_search_analyzer",
"index_analyzer" : "str_index_analyzer"
}
}
}
}'
The query returns proper results is I search by full words but does not match any substring match. Also, the boost factor is not showing its effect.
What am I doing wrong ??
You have to create first your index with your index settings (analyzer):
"analysis" : {
"analyzer" : {
"str_search_analyzer" : {
"tokenizer" : "keyword",
"filter" : ["lowercase"]
},
"str_index_analyzer" : {
"tokenizer" : "keyword",
"filter" : ["lowercase", "ngram"]
}
},
"filter" : {
"ngram" : {
"type" : "ngram",
"min_gram" : 2,
"max_gram" : 20
}
}
}
Then you can define a mapping for your type:
"autocomplete_questions": {
"_boost" : {
"name" : "po",
"null_value" : 1.0
},
"properties": {
"po": {
"type": "double"
},
"text": {
"type": "string",
"boost": 3.0,
"search_analyzer" : "str_search_analyzer",
"index_analyzer" : "str_index_analyzer"
}
}
}
And only then, you can create the river:
curl -XPUT "localhost:9200/_river/autocompleteindex/_meta" -d '
{
"type": "mongodb",
"mongodb": {
"host": "rahulg-dc",
"port": "27017",
"db": "qna",
"collection": "autocomplete_questions"
},
"index": {
"name": "autocompleteindex",
"type": "autocomplete_questions"} }
Does it help?