Querying in mongodb 3.4 - mongodb

I have large XML files which I had to convert in json and store in mongodb. The python code for conversion and insertion is:
import pymysql
import re
import json
import xmltodict
from pymongo import MongoClient
# Open Database Connection.
db = pymysql.connect("fffff","ddd","fgf","hnj")
# prepare a cursor object
cursor = db.cursor()
# execute SQL query
cursor.execute("SELECT jlp.appid, convert(MAX(lex.response) using utf8) FROM jos_lender_portfolio jlp INNER JOIN jos_lexnex_data lex ON jlp.appid = lex.appid\
group by appid limit 10;")
# Fetch all rows
data = cursor.fetchall()
a = (r'(?=<response>)(.*)(?<=</response>)')
def cleanxml(xml):
if re.findall(a, xml, re.S):
file = re.findall(a, xml, re.S)[0]
else:
file = "<response>NA</response>"
return file
data = list(data)
client = MongoClient()
db = client['lexnex']
collection = db['test']
for row in data:
thexml = cleanxml(row[1])
jsonString = json.dumps(xmltodict.parse(thexml), indent = 4)
d = json.loads(jsonString)
newdict = {"caseid" : row[0]}
newdict.update(d)
jsondata = json.dumps(newdict, indent = 3)
f = json.loads(jsondata)
db.test.insert_one(f)
Now, the problem: I'm very new to mongodb and having problem in querying my database.I have the following json:
"_id":ObjectId("5aeff8537871560bf05d8c25"),
"caseid":44136,
"response":{
"Header":{
"TransactionId":"18092257R1069402",
"Status":"0"
},
"Records":{
"Record":[
{
"Filings":{
"Filing":{
"Type":"INITIAL FILING",
"Date":{
"Day":"23",
"Month":"9",
"Year":"2008"
}
}
},
"FilingJurisdiction":"NY",
"MatchedParty":{
"PartyType":"D",
"Address":{
"City":"BROOKLYN",
"State":"NY",
},
"OriginName":"GOLDLINE"
},
"Secureds":{
"Secured":{
"Addresses":{
"Address":{
"City":"SCHAUMBURG",
"State":"IL"
}
}
}
}
},
{
,
"Filings":{
"Filing":{
"Type":"INITIAL FILING",
"Date":{
"Day":"23",
"Month":"9",
"Year":"2008"
}
}
},
"FilingJurisdiction":"NY",
"MatchedParty":{
"PartyType":"D",
"Address":{
"City":"BROOKLYN",
"State":"NY",
},
"OriginName":"GOLD"
},
"Secureds":{
"Secured":{
"Addresses":{
"Address":{
"City":"SCHAUMBURG",
"State":"IL"
}
}
}
}
}
]
}
}
This is a small portion of a very big document and there are more than a million such documents. Now, the expected result which I want is for every caseid, some part of the Filings and the Secureds. Here's the sample expected output:
"_id":ObjectId("5aeff8537871560bf05d8c25"),
"caseid":44136,
"Filings":{
[
"Filing":{
"Type":"INITIAL FILING",
"Date":{
"Day":"23",
"Month":"9",
"Year":"2008"
}
},
"Secureds":{
"Secured":{
"Addresses":{
"Address":{
"City":"SCHAUMBURG",
"State":"IL"
}
}
}
},
{
"Filing":{
"Type":"INITIAL FILING",
"Date":{
"Day":"23",
"Month":"9",
"Year":"2008"
}
}
},
"Secureds":{
"Secured":{
"Addresses":{
"Address":{
"City":"SCHAUMBURG",
"State":"IL"
}
}
}
}
]
}
There are several caseids and each one has 0 or more filings. I have no clue how to do it. I know the basics like simple queries. But this, I think, requires $unwind and $group together. What I have written so far is nothing but just this:
db.test.aggregate([{$unwind:{path: '$response'}},{"$group":{_id:{caseid:"$caseid"}}}])
Please help.

Related

Groovy/Jenkins: how to refactor sh(script:"curl ...") to URL?

My Jenkins pipeline currently successfully invokes Bitbucket REST API by invoking curl in a shell, as in the code below:
// Jenkinsfile
#Library('my-sandbox-libs#dev') my_lib
pipeline {
agent any
stages {
stage( "1" ) { steps { script { echo "hello" } } }
stage( "2" ) {
steps {
script {
log = new org.log.Log()
def cred_id = "bitbucket_cred_id"
def url_base = "https://bitbucket.company.com"
def commit = "76136485c45df256a62cbc2c3c5f1f3efcc86258"
def status =
//"INPROGRESS",
//"SUCCESSFUL",
"FAILED"
def viz_url = "https://path/to/nowhere"
try {
my_lib.notifyBitbucketBuildStatus(cred_id,
url_base,
commit,
status,
"foo",
42,
viz_url,
log)
}
}
}
}
stage( "3" ) { steps { script { echo "world" } } }
}
post { always { script { echo log.asJsonString() } } }
}
import groovy.json.JsonOutput
def notifyBitbucketBuildStatus(cred_id,
url_base,
commit,
build_state,
build_info_name,
build_info_number,
viz_url,
log) {
def rest_path = "rest/build-status/1.0/commits"
def dict = [:]
dict.state = build_state
dict.key = "${build_info_name}_${build_info_number}"
dict.url = viz_url
withCredentials([string(credentialsId: cred_id,
variable: 'auth_token')]) {
def cmd = "curl -f -L " +
"-H \"Authorization: Bearer ${auth_token}\" " +
"-H \"Content-Type:application/json\" " +
"-X POST ${url_base}/${rest_path}/${commit} " +
"-d \'${JsonOutput.toJson(dict)}\'"
if ( 0 != sh(script: cmd, returnStatus: true) ) {
log.warn("Failed updating build status with Bitbucket")
}
}
}
I would like to refactor function notifyBitbucketBuildStatus() to use a "native" Groovy-language solution, rather than invoking curl in a shell. I read the following on this topic:
https://www.baeldung.com/groovy-web-services
Groovy built-in REST/HTTP client?
...based on which I thought the refactored function would look like this:
def notifyBitbucketBuildStatus(cred_id,
url_base,
commit,
build_state,
build_info_name,
build_info_number,
viz_url,
log) {
def rest_path = "rest/build-status/1.0/commits"
def dict = [:]
dict.state = build_state
dict.key = "${build_info_name}_${build_info_number}"
dict.url = viz_url
def req = new URL("${url_base}/${rest_path}/${commit}").openConnection()
req.setRequestMethod("POST")
req.setDoOutput(true)
req.setRequestProperty("Content-Type", "application/json")
withCredentials([string(credentialsId: cred_id,
variable: 'auth_token')]) {
req.setRequestProperty("Authorization", "Bearer ${auth_token}")
}
def msg = JsonOutput.toJson(dict)
req.getOutputStream().write(msg.getBytes("UTF-8"));
if ( 200 != req.getResponseCode() ) {
log.warn("Failed updating build status with Bitbucket")
}
}
...but this generates the exception java.io.NotSerializableException: sun.net.www.protocol.https.HttpsURLConnectionImpl
That "not serializable" made me think the error had something to do with a failure to transform something to a string, so I also tried this, but it did not change the error:
def msg = JsonOutput.toJson(dict).toString()
What is wrong with the refactored code that uses class URL, and what is the right way to use it to invoke the REST API?
For the life of me, I can't see what's different between the above and the linked Stack Overflow Q&A, and my inexperience with the language is such that I rely largely on adapting existing example.
Solution
I would highly suggest you use the HTTP Request and the Pipeline Steps Utility plugin for this. You can then use those steps in a Groovy script as follows
node('master') {
withCredentials([string(credentialsId: cred_id, variable: 'auth_token')]) {
def response = httpRequest url: "https://jsonplaceholder.typicode.com/todos", customHeaders: [[name: 'Authorization', value: "Bearer ${auth_token}"]]
}
if( response.status != 200 ) {
error("Service returned a ${response.status}")
}
def json = readJSON text: response.content
println "The User ID is ${json[0]['userId']}"
println "The follow json obj is ${json}"
}
Obviously you can modify the code if you want to build a method, and you will need to update with the appropriate URL.
I found a sucky and unsatisfying answer - but an answer nonetheless - that I posted here: https://stackoverflow.com/a/69486890/5437543
I hate that solution because it would appear to demonstrate that the Jenkins/Groovy language itself imposes an artificial contrivance on how I can organize my code. Effectively, I am prevented from doing
// Jenkinsfile
#Library('my-sandbox-libs#dev') my_lib
pipeline {
agent any
stages {
stage( "1" ) { steps { script { my_lib.func() } } }
}
}
// vars/my_lib.groovy
def func() {
def post = new URL("https://whatever").openConnection();
...
withCredentials([string(credentialsId: cred_id,
variable: 'auth_token')]) {
req.setRequestProperty("Authorization", "Bearer ${auth_token}")
}
...
}
...and I am forced to do
// Jenkinsfile
#Library('my-sandbox-libs#dev') my_lib
pipeline {
agent any
stages {
stage( "1" ) { steps { script { my_lib.func(my_lib.getCred()) } } }
}
}
// vars/my_lib.groovy
def getCred() {
withCredentials([string(credentialsId: cred_id,
variable: 'auth_token')]) {
return auth_token
}
}
def func(auth_token) {
def post = new URL("https://whatever").openConnection();
...
req.setRequestProperty("Authorization", "Bearer ${auth_token}")
...
}
Extremely dissatisfying conclusion. I hope another answerer can point out a solution that doesn't rely on this contrived code organization.

Tags format on Packer ec2-ami deployment

I'm trying out to create an amazon ec2 ami for the 1st time using Hashicorp Packer, however getting this failure on the tag creation, I already tried some retries on trial and error test for the format but still unlucky
[ec2-boy-oh-boy#ip-172-168-99-23 pogi]$ packer init .
Error: Missing item separator
on variables.pkr.hcl line 28, in variable "tags":
27: default = [
28: "environment" : "testing"
Expected a comma to mark the beginning of the next item.
My code ec2.pkr.hcl looks like this:
[ec2-boy-oh-boy#ip-172-168-99-23 pogi]$ cat ec2.pkr.hcl
packer {
required_plugins {
amazon = {
version = ">= 0.0.2"
source = "github.com/hashicorp/amazon"
}
}
}
source "amazon-ebs" "ec2" {
ami_name = "${var.ami_prefix}-${local.timestamp}"
instance_type = "t2.micro"
region = "us-east-1"
vpc_id = "${var.vpc}"
subnet_id = "${var.subnet}"
security_group_ids = ["${var.sg}"]
ssh_username = "ec2-boy-oh-boy"
source_ami_filter {
filters = {
name = "amzn2-ami-hvm-2.0*"
root-device-type = "ebs"
virtualization-type = "hvm"
}
most_recent = true
owners = ["12345567896"]
}
launch_block_device_mappings = [
{
"device_name": "/dev/xvda",
"delete_on_termination": true
"volume_size": 10
"volume_type": "gp2"
}
]
run_tags = "${var.tags}"
run_volume_tags = "${var.tags}"
}
build {
sources = [
"source.amazon-ebs.ec2"
]
}
[ec2-boy-oh-boy#ip-172-168-99-23 pogi]$
Then my code variables.pkr.hcl looks like this:
[ec2-boy-oh-boy#ip-172-168-99-23 pogi]$ cat variables.pkr.hcl
locals {
timestamp = regex_replace(timestamp(), "[- TZ:]", "")
}
variable "ami_prefix" {
type = string
default = "ec2-boy-oh-boy"
}
variable "vpc" {
type = string
default = "vpc-asd957d"
}
variable "subnet" {
type = string
default = "subnet-asd957d"
}
variable "sg" {
type = string
default = "sg-asd957d"
}
variable "tags" {
type = map
default = [
environment = "testing"
type = "none"
production = "later"
]
}
Your default value for the tags variable is of type list(string). Both the run_tags and run_volume_tags directives expect type map[string]string.
I was able to make the following changes to your variables file and run packer init successfully:
variable "tags" {
default = {
environment = "testing"
type = "none"
production = "later"
}
type = map(string)
}

set shell mode for mongodb [duplicate]

I am using MongoDB Driver Java API to convert BSON to JSON.
I have test code like this.
String input = "{ \"timestamp\" : 1486064586641 }";
org.bson.Document doc = org.bson.Document.parse(input);
System.out.println("input = " + input);
System.out.println("output = " + doc.toJson());
The output is:
input = { "timestamp" : 1486064586641 }
output = { "timestamp" : { "$numberLong" : "1486064586641" } }
Is there an easy way to make the output look like the input?
BSON Documnet's toJson method supports only output to MongoDB Extended JSON (STRICT or SHELL format). If you want to have regular JSON, you can use com.mongodb.util.JSON class:
String input = "{ \"timestamp\" : 1486064586641 }";
org.bson.Document doc = org.bson.Document.parse(input);
System.out.println("input = " + input);
System.out.println("output (SHELL) = " + doc.toJson(new JsonWriterSettings(JsonMode.SHELL)));
System.out.println("output (STRICT) = " + doc.toJson(new JsonWriterSettings(JsonMode.STRICT)));
System.out.println("output (JSON) = " + com.mongodb.util.JSON.serialize(doc));
This will generate following output:
input = { "timestamp" : 1486064586641 }
output (SHELL) = { "timestamp" : NumberLong("1486064586641") }
output (STRICT) = { "timestamp" : { "$numberLong" : "1486064586641" } }
output (JSON) = { "timestamp" : 1486064586641}
Natalja's answer is excellent, but if you are using the Mongo Java driver 3.8.2 upwards you will notice some deprecation warnings. If you want the output to look like the input you can use RELAXED JsonWriterSettings mode.
Below you can see an example with the possible modes and how the JSON will looks like. There are also some deprecation warnings and alternatives to the deprecated code:
String input = "{ \"timestamp\" : 1486064586641 }";
org.bson.Document doc = org.bson.Document.parse(input);
System.out.println("input = " + input);
JsonWriterSettings shellMode = JsonWriterSettings.builder().outputMode(JsonMode.SHELL).build();
System.out.println("output (SHELL) = " + doc.toJson(shellMode));
JsonWriterSettings strictMode = JsonWriterSettings.builder().outputMode(JsonMode.STRICT).build();
System.out.println("output (STRICT) = " + doc.toJson(strictMode)); // deprecated - use extended like below
JsonWriterSettings extendedMode = JsonWriterSettings.builder().outputMode(JsonMode.EXTENDED).build();
System.out.println("output (EXTENDED) = " + doc.toJson(extendedMode));
JsonWriterSettings relaxed = JsonWriterSettings.builder().outputMode(JsonMode.RELAXED).build();
System.out.println("output (RELAXED) = " + doc.toJson(relaxed));
System.out.println("output (JSON) = " + com.mongodb.util.JSON.serialize(doc)); // deprecated - use relaxed like above
Also note that the JsonWriterSettings constructor is deprecated and you can use as an alternative the builder method like e.g:
JsonWriterSettings.builder().outputMode(JsonMode.SHELL).build()

for each group by date in coffeescript

which pulls data from and reformats it.
Promise = require "bluebird"
request = Promise.promisify require "request"
moment = require "moment"
cdn = require('config').server.cloudFrontDomain
toTitleCase = require "titlecase"
exports.getStocks = (path) ->
return new Promise (resolve, reject) ->
request path
.then (body) ->
germanStock = []
germanStocks = JSON.parse body.body
germanStocks.forEach (stock) ->
obj = {}
this.parsePart = (remaining) ->
value = remaining.value
dashIndex = value.lastIndexOf '-'
if dashIndex != -1
remaining.value = value.substring 0, dashIndex - 1
return value.substring(dashIndex + 1).trim()
else
return ''
remaining =
value: stock.name
size = parsePart remaining
colour = parsePart remaining
name = remaining.value
sku = stock.sku
styleId = sku.split(/-/)[0]
colorcode = /^(.*)-(.*)([0-9])$/.exec(sku)?[2]
bgStyle = "url(//#{cdn}/assets/product_shots/thumbs/#{styleId}-#{colorcode}.jpg)"
obj.id = sku
obj.name = name
obj.colorUrl = bgStyle
obj.colour = toTitleCase(colour.toLowerCase())
obj.size = size
obj.stock = stock.stock
obj.inProduction = ''
obj.office = 'DE'
stock.preorders.forEach (i, idx) ->
date = moment(i.date).format('DD-MM-YYYY')
if idx != stock.preorders.length - 1
obj.inProduction = obj.inProduction.concat i.amount + ' due on ' + date + ', '
else
obj.inProduction = obj.inProduction.concat i.amount + ' due on ' + date
germanStock.push obj
resolve germanStock
.catch (err) ->
reject err
where my data is like:
{
"id":1,
"stamp":"2014-09-25T12:55:30Z",
"name":" MENS T-SHIRT - BRIGHT BLUE - XS",
"sku":"SS01-BB0",
"stock":81,
"active":true,
"preorders":[
{
"id":92549,
"amount":160,
"date":"2016-06-19T22:00:00Z"
},
{
"id":92549,
"amount":200,
"date":"2016-06-19T22:00:00Z"
},
{
"id":92549,
"amount":1000,
"date":"2016-06-21T22:00:00Z"
}
],
"discountMatrix":0.0,
"stockNormalized":81,
"preOrdersSum":1360
},
{
"id":2,
"stamp":"2014-09-25T12:55:30Z",
"name":" MENS T-SHIRT - BRIGHT BLUE - S",
"sku":"SS01-BB1",
"stock":339,
"active":true,
"preorders":[
{
"id":92551,
"amount":240,
"date":"2016-06-19T22:00:00Z"
},
{
"id":92438,
"amount":160,
"date":"22016-06-19T22:00:00Z"
}
],
"discountMatrix":0.0,
"stockNormalized":339,
"preOrdersSum":400
},
what is the correct way to group each preorders quantity that is on the same date, so that instead of getting:
160 due on 19-06-2016, 200 due on 19-06-2016, 1000 due on 21-06-2016
i get 360 due on 19-06-2016, 1000 due on 21-06-2016
any advice much appreciated.
You could just use an object with the date as key and the total amount for the date as value.
For each preorder, add it's amount at it's date index in this object. At the end of the iteration print the content of the object:
moment = require "moment"
data = [
{
id:1
stamp: "2014-09-25T12:55:30Z"
name: " MENS T-SHIRT - BRIGHT BLUE - XS"
sku: "SS01-BB0"
stock:81
active:true
preorders:[
{
id:92549
amount:160
date: "2016-06-19T22:00:00Z"
}
{
id:92549
amount:200
date: "2016-06-19T22:00:00Z"
}
{
id:92549
amount:1000
date: "2016-06-21T22:00:00Z"
}
]
discountMatrix:0.0
stockNormalized:81
preOrdersSum:1360
}
]
obj = {}
obj.inProduction = ""
amountByDate = {}
# for each document in your data
for doc in data
# for each preorder in your document
for preorder in doc.preorders
# add it's amount in the index equals to it's date
if amountByDate[preorder.date]
amountByDate[preorder.date] += preorder.amount
else
# or create the index with the value if it doesn't exist
amountByDate[preorder.date] = preorder.amount
for date, amount of amountByDate
if obj.inProduction != ""
obj.inProduction = obj.inProduction.concat ", #{amount} due on #{moment(date).format('DD-MM-YYYY')}"
else
obj.inProduction = obj.inProduction.concat "#{amount} due on #{moment(date).format('DD-MM-YYYY')}"
console.log obj.inProduction
Result:
360 due on 20-06-2016, 1000 due on 22-06-2016

Play Framework: How to read an entire configuration section consisting of unknown keys

Here below is how I'd like to configure security profiles for my Play application – each entry in auth.securityProfiles consists of an Operation => Roles pair:
auth {
securityProfiles {
myOperation1 = "author, auditor"
myOperation2 = "admin"
myOperationN = "auditor, default"
}
}
How do I read all the entries in section auth.securityProfiles to produce a Map like this?
val securityProfiles = Map(
"myOperation1" -> "author, auditor",
"myOperation2" -> "admin",
"myOperationN" -> "auditor, default"
)
Thanks.
Here is my solution... I've just modified the configuration like this...
auth {
securityProfiles = [
{
operation = "myOperation1"
roles = ["author", "auditor"]
}
{
operation = "myOperation2"
roles = ["admin"]
}
{
operation = "myOperationN"
roles = ["auditor", "default"]
}
]
}
... and then read it with the following code snipper:
import scala.collection.mutable.Map
var securityProfiles = Map[String, List[String]]().withDefaultValue(List.empty)
configuration.getConfigList("auth.securityProfiles").map { _.toList.map { config =>
config.getString("operation").map { op =>
securityProfiles += (op -> config.getStringList("roles").map(_.toList).getOrElse(List.empty))
}
}}
I hope that helps.