Make square bracket not appear on a new line? - perl

I am currently working with a .txt file containing data points of certain files.
Since the files are pretty big, are they been processed in smaller parts, but the output extracted from the processing process is not sorted in any order..
They are stored as such:
1_1_0_1_0_1_1_0_232 [
0 -19.72058 -18.89882 ]
1_0_0_0_0_0_0_0_0 [
-0.5940279 -1.949468 -1.185638 ]
1_0_1_1_0_1_1_1_100 [
-5.645662 -0.005585805 -6.196068 ]
1_0_1_1_0_1_1_1_101 [
-15.86037 -1.192093e-07 -18.77053 ]
1_0_1_1_0_1_1_1_102 [
-0.5648238 -1.970869 -1.230303 ]
1_0_1_1_1_0_1_0_103 [
-0.5750521 -1.946886 -1.222114 ]
1_0_1_1_1_0_1_0_104 [
-0.5926428 -1.941596 -1.191844 ]
1_0_1_1_1_0_1_0_105 [
-25.25665 0 -31.0921 ]
1_0_1_1_1_0_1_0_106 [
-0.001282441 -6.852591 -8.399776 ]
1_0_1_1_1_0_1_0_107 [
-0.0001649993 -8.857877 -10.69688 ]
1_0_1_1_1_0_1_0_108 [
-21.66693 0 -26.18516 ]
1_0_1_1_1_0_1_0_109 [
-5.444038 -0.004555213 -8.408965 ]
1_1_0_1_0_1_0_0_200 [
-4.023561 -0.01851013 -7.704897 ]
1_1_0_1_0_1_0_0_201 [
-0.443548 -3.057277 -1.167226 ]
1_1_0_1_0_1_0_0_202 [
-0.0001185011 -9.042104 -15.60585 ]
1_1_0_1_0_1_0_0_203 [
-5.960466e-07 -14.37778 -25.2224 ]
1_1_0_1_0_1_0_0_204 [
-0.5770675 -1.951139 -1.21623 ]
1_1_0_0_1_0_1_1_205 [
-0.5849463 -1.938798 -1.207353 ]
1_1_0_0_1_0_1_1_206 [
-0.5785673 -1.949474 -1.214192 ]
1_1_0_0_1_0_1_1_207 [
-27.21529 0 -32.21676 ]
1_1_0_0_1_0_1_1_208 [
-8.75938 -0.0001605878 -12.53627 ]
1_1_0_0_1_0_1_1_209 [
-1.281936 -0.3837854 -3.188763 ]
1_0_0_0_0_0_0_1_20 [
-0.2104172 -4.638866 -1.714325 ]
1_1_1_0_0_1_1_1_310 [
-11.71479 -9.298368e-06 -13.70222 ]
1_1_1_0_0_1_1_1_311 [
-24.71166 0 -30.45412 ]
1_1_1_0_0_1_1_1_312 [
-2.145031 -0.1357486 -4.617914 ]
1_1_1_0_0_1_1_1_313 [
-5.943637 -0.003112446 -7.630904 ]
1_1_1_0_0_1_1_1_314 [
0 -25.82314 -31.98673 ]
1_1_1_0_0_1_1_1_315 [
-8.178092e-05 -13.60563 -9.426649 ]
1_1_1_0_0_1_1_1_316 [
-0.00326875 -6.071715 -6.952539 ]
1_1_1_0_0_1_1_1_317 [
-17.92782 0 -24.64391 ]
1_1_1_0_0_1_1_1_318 [
-2.979753 -0.05447901 -6.11194 ]
1_1_1_0_0_1_1_1_319 [
-0.7661145 -1.118131 -1.568804 ]
1_0_0_0_0_0_0_1_31 [
-0.5749408 -1.961912 -1.215127 ]
1_0_0_0_0_0_0_0_10 [
-4.64927e-05 -9.977531 -20.60117 ]
1_0_1_1_1_1_0_1_120 [
-0.4925551 -1.135103 -2.694917 ]
1_0_1_1_1_1_0_1_131 [
-0.6127387 -1.958336 -1.148721 ]
1_1_0_0_0_0_0_1_142 [
-0.008494892 -6.882521 -4.901772 ]
1_1_0_0_0_1_1_1_153 [
0 -20.48085 -27.38916 ]
1_1_0_0_1_0_1_0_164 [
-0.5370184 -1.622399 -1.52286 ]
1_1_0_0_1_0_1_0_175 [
-24.08685 0 -29.42813 ]
1_1_0_0_1_1_1_0_186 [
-1.665665 -0.2307523 -4.074597 ]
1_0_0_0_0_0_0_0_1 [
-0.5880737 -1.945877 -1.198183 ]
1_1_0_0_1_0_1_1_210 [
-0.001396737 -6.574267 -21.30147 ]
1_1_0_1_0_1_1_0_221 [
-0.7456465 -1.893918 -0.980585 ]
1_0_0_0_0_0_1_1_42 [
-3.838613e-05 -10.23002 -13.01793 ]
1_0_0_0_0_0_1_1_43 [
-22.25132 0 -28.8467 ]
1_0_0_0_0_0_1_1_44 [
-6.688306 -0.001266626 -10.79875 ]
1_0_0_0_0_0_1_1_45 [
-0.429086 -2.197691 -1.436171 ]
1_0_0_0_0_0_1_1_46 [
-0.6683982 -1.928907 -1.072464 ]
1_0_0_0_1_0_0_1_47 [
-0.5767454 -1.972311 -1.206838 ]
1_0_0_0_1_0_0_1_48 [
-0.5789171 -1.965128 -1.206118 ]
1_0_0_0_1_0_0_1_49 [
-19.90514 0 -25.12686 ]
1_0_0_0_0_0_0_0_4 [
-4.768373e-07 -14.66496 -28.4888 ]
1_0_0_0_1_0_0_1_50 [
-0.01524216 -6.729354 -4.273614 ]
1_0_0_0_1_0_0_1_51 [
-3.576279e-07 -14.9054 -27.44406 ]
1_0_0_0_1_0_0_1_53 [
-0.003753785 -8.922103 -5.623135 ]
The format it is stored in is: <name>_<part> [<data points>]
I currently use a perl script to sort the datapoints.
perl -n00e '
while ( /([\d_]*)_(\d*) \s* \[ \s* (.*?) \s* \]/gmsx ) {
($name,$part,$datapoints) = ($1,$2,$3);
$hash{$name}{$part}=$datapoints;
}
while (($key,$v)=each %hash) {
print "$key [\n", (
map "${$v}{$_}\n", sort {$a<=>$b} keys %{$v}
), "]\n";
}
'
Which creates an output as such:
0_0_1_1_0_1_1_1 [
-0.5757762 -1.949812 -1.219321
-0.5732827 -1.974719 -1.212248
-0.005632018 -5.198827 -9.280998
-0.004484621 -7.180546 -5.595852
-1.776234e-05 -10.93515 -20.11548
-22.73301 0 -29.42717
-4.227753 -0.01532919 -7.374347
-3.396693 -0.05122549 -4.10732
-0.0008418526 -7.08029 -20.86733
-21.26725 0 -27.1029
-2.457597 -0.09611109 -5.11661
-5.492554 -0.00666456 -5.981491
-12.60927 -3.576285e-06 -15.31444
-0.5809742 -1.953598 -1.2077
-0.5807223 -1.969571 -1.200681
]
...
Which is correct, but the end square bracket should not be on a new line,
but be placed a space distance after the last data point has been printed.
It doesn't look like the Perl script itself explicitly makes a new line
but some of the commands are invoking a new line.. is it possible to negate this effect?

Here is what #bytepusher recommended in comments:
while (($key,$v)=each %hash) {
print "$key [\n", join("\n",
map "${$v}{$_}", sort {$a<=>$b} keys %{$v}
), " ]\n";
}

Related

Group Neighbors by Outgoing Edge Type and Add Self to Result List

I want a result like this:
[
{
"vertex": [ the_vertex_itself ]
},
{
"outgoingEdgeGroup1": [ list_of_outgoing_neighbors_with_edge_type_outgoingEdgeGroup1 ]
},
{
"outgoingEdgeGroup2": [ list_of_outgoing_neighbors_with_edge_type_outgoingEdgeGroup2 ]
}
]
I'm able to get this:
[
{
"outgoingEdgeGroup1": [ list_of_outgoing_neighbors_with_edge_type_outgoingEdgeGroup1 ]
},
{
"outgoingEdgeGroup2": [ list_of_outgoing_neighbors_with_edge_type_outgoingEdgeGroup2 ]
}
]
With the following query:
g.V('{unitId}').outE().group().by(label()).by(inV().fold())
But how would I append on the target vertex itself?
One way is just to use a union. If you need a more complete key/value type of structure that can be created also by adding project steps or nested group steps.
Using the air-routes data set:
gremlin> g.V(44).union(identity().values('city'),inE().group().by(label()).by(outV().fold())).fold()
==>[Santa Fe,[contains:[v[3742],v[3728]],route:[v[13],v[31],v[20],v[8]]]]

Fill Multidimensional Array with two variables in loop

I am new to PERL and I try to get the following result in a loop:
# ResultFistStep.
$VAR1 = [
[
'Hello1'
],
[
'Hello2'
],
[
'Hello3'
],
];
But if i use a reference for the InnerArray \#InnerArray:
# Example1
my #OuterArray;
my #InnerArray;
foreach(1,2,3)
{
#InnerArray[0] = "Hello" . $_;
push(#OuterArray, \#InnerArray);
}
print Dumper \#OuterArray;
... i get this result:
$VAR1 = [
[
'Hello3'
],
$VAR1->[0],
$VAR1->[0]
];
If i try it without the reference:
# Example2
my #OuterArray;
my #InnerArray;
foreach(1,2,3)
{
#InnerArray[0] = "Hello" . $_;
push(#OuterArray, #InnerArray);
}
print Dumper \#OuterArray;
.. i get this result:
$VAR1 = [
'Hello1',
'Hello2',
'Hello3'
];
But what i want is the result shown at the beginning (ResultFistStep) and at some point in the end the following result (ResultFinally):
# ResultFinally
$VAR1 = [
[
'Hello1',
[
[],
[]
]
],
[
'Hello2',
[
[],
[]
]
],
[
'Hello3',
[
[],
[]
]
],
];
So the questions are:
How do i get this result for the ResultFirstStep done?
Can i solve the problem from ResultFinally with Perl?
Can please someone help me? I dont see the mistake.
Just use anonymous arrays:
my #outer;
push #outer, [ "Hello$_", [ [], [] ] ] for 1 .. 3;
or even
my #outer = map [ "Hello$_", [ [], [] ] ], 1 .. 3;
If you want to use the inner array, declare it inside the loop, otherwise you're reusing the same array again and again:
my #outer;
for (1 .. 3) {
my #inner = ( "Hello$_", [ [], [] ] );
push #outer, \#inner;
}

How to fix geojson to satisfy the needs of a mongodb 2dsphere index

I have ~400K documents in a mongo collection, all with geometry of type:Polygon. It is not possible to add a 2dsphere index to the data as it currently stands because the geometry apparently has self-intersections.
In the past we had a hacky workaround which was to compute the bounding box of the geometry on a mongoose save hook and then index that rather than the geometry itself, but we would like to simplify things and just use the actual geometry.
So far I have tried using turf as follows (this is the body of a function called fix):
let geom = turf.polygon(geometry.coordinates);
geom = turf.simplify(geom, { tolerance: 1e-7 });
geom = turf.cleanCoords(geom);
geom = turf.unkinkPolygon(geom);
geom = turf.combine(geom);
return geom.features[0].geometry;
The most important function there is the unkinkPolygons which I hoped would do exactly what I wanted, i.e. make the geometry nice enough to be indexed. The simplify is possibly not helpful but I added it in for good measure. The clean is there because unkink complained about its input, and the combine is there to turn an array of Polygons into a single MultiPolygon. Actually, unkink still wasn't happy with it's inputs, so I had to write a hacky function as follows that jitters duplicated vertices, this modifies the geom before passing to unkink:
function jitterDups(geom) {
let coords = geom.geometry.coordinates;
let points = new Set();
for (let ii = 0; ii < coords.length; ii++) {
// last coords is allowed to match first, not sure if it must match.
let endsMatch = coords[ii][0].join(",") === coords[ii][coords[ii].length - 1].join(",");
for (let jj = 0; jj < coords[ii].length - (endsMatch ? 1 : 0); jj++) {
let str = coords[ii][jj].join(",");
while (points.has(str)) {
coords[ii][jj][0] += 1e-8; // if you make this too small it doesn't do the job
if (jj === 0 && endsMatch) {
coords[ii][coords[ii].length - 1][0] = coords[ii][jj][0];
}
str = coords[ii][jj].join(",");
}
points.add(str);
}
}
}
However, even after all of that mongo still complains.
Here is some sample raw Polygon input:
{ type: "Polygon", coordinates: [ [ [ -0.027542009179339, 51.5122867222457 ], [ -0.027535822940572, 51.512281465421 ], [ -0.027535925691804, 51.5122814221859 ], [ -0.027589474043984, 51.5122605515771 ], [ -0.027638484531731, 51.5122996934574 ], [ -0.027682911101528, 51.5123351881505 ], [ -0.027689915350493, 51.5123872384419 ], [ -0.027672409315982, 51.5123868001613 ], [ -0.027667905522642, 51.5123866344944 ], [ -0.027663068941865, 51.5123864992013 ], [ -0.02764931654289, 51.512375566682 ], [ -0.027552504539425, 51.5122983194123 ], [ -0.027542009179339, 51.5122867222457 ] ], [ [ -0.027542009179339, 51.5122867222457 ], [ -0.027557948301911, 51.5122984109658 ], [ -0.027560309178214, 51.5123001412876 ], [ -0.027542009179339, 51.5122867222457 ] ] ] }
And that same data after it has passed through the above fixing pipeline:
{ type: "MultiPolygon", coordinates: [ [ [ [ -0.027560309178214, 51.5123001412876 ], [ -0.02754202882236209, 51.51228674396312 ], [ -0.027542009179339, 51.5122867222457 ], [ -0.027535822940572, 51.512281465421 ], [ -0.027589474043984, 51.5122605515771 ], [ -0.027682911101528, 51.5123351881505 ], [ -0.027689915350493, 51.5123872384419 ], [ -0.027663068941865, 51.5123864992013 ], [ -0.027552504539425, 51.5122983194123 ], [ -0.02754202884162257, 51.51228674398443 ], [ -0.027557948301911, 51.5122984109658 ], [ -0.027560309178214, 51.5123001412876 ] ] ], [ [ [ -0.02754202884162257, 51.51228674398443 ], [ -0.02754202882236209, 51.51228674396312 ], [ -0.027541999179339, 51.5122867222457 ], [ -0.02754202884162257, 51.51228674398443 ] ] ] ] }
And here is the relevant bit of the error that is spat out by the index creation:
Edges 0 and 9 cross.
Edge locations in degrees: [-0.0275603, 51.5123001]-[-0.0275420, 51.5122867] and [-0.0275420, 51.5122867]-[-0.0275579, 51.5122984]
"code" : 16755,
"codeName" : "Location16755"
My question is: is there a bug in turf, or is it not doing what I need here in terms of keeping mongo happy? Also is there any documentation on exactly what the 2dshpere index needs in terms of "fixing"? Also, does anyone have suggestions as to what other tools I might use to fix the data, e.g. mapshaper or PostGIS's ST_MakeValid.
Note that once the existing data is fixed I also need a solution for fixing new data on the fly (ideally something that works nice with node).
Mongo Version: 3.4.14 (or any later 3.x)
The problem here is not that the polygon is intersecting itself, but rather that you have a (tiny) hole in the polygon, composed of 4 points, which shares a point with the exterior. So the hole "touches" the exterior, not intersects with it, but this is not allowed.
You can fix such cases using Shapely buffer with a tiny value, e.g.:
shp = shapely.geometry.shape({ "type": "Polygon", "coordinates": [ [ [ -0.027542009179339, 51.5122867222457 ], [ -0.027535822940572, 51.512281465421 ], [ -0.027535925691804, 51.5122814221859 ], [ -0.027589474043984, 51.5122605515771 ], [ -0.027638484531731, 51.5122996934574 ], [ -0.027682911101528, 51.5123351881505 ], [ -0.027689915350493, 51.5123872384419 ], [ -0.027672409315982, 51.5123868001613 ], [ -0.027667905522642, 51.5123866344944 ], [ -0.027663068941865, 51.5123864992013 ], [ -0.02764931654289, 51.512375566682 ], [ -0.027552504539425, 51.5122983194123 ], [ -0.027542009179339, 51.5122867222457 ] ], [ [ -0.027542009179339, 51.5122867222457 ], [ -0.027557948301911, 51.5122984109658 ], [ -0.027560309178214, 51.5123001412876 ], [ -0.027542009179339, 51.5122867222457 ] ] ] })
shp = shp.buffer(1e-12, resolution=0)
geojson = shapely.geometry.mapping(shp)

CouchDB View - Filter and Group By on Key Array

Description of Problem
I have an array of keys in a CouchDB view, [doc.time, doc.address]. Neither is unique. doc.time is a UNIX timestamp and doc.address is a string. The reduce function is set to _sum as the only value for each set of keys is a number.
What I want is to filter by doc.time, then group the remaining records by doc.address. If I put doc.time as the first key, I cannot seem to group by unique addresses no matter what I specify as a group_level. If I put doc.address first, I cannot seem to filter the query by time.
Two Examples
Query: ?group_level=1&startkey=[0,1230000000]&endkey=[{},1340000000]
First Key: doc.address before doc.time
Problem: Does not filter by time
Code:
rows: [
{
key: [ "1126GDuGLQTX3LFHHmjCctdn8WKDjn7QNA" ],
value: 50
},
{
key: [ "112AobLhjLJQ3LGqXFrsdnWMPqWCQqoiS6" ],
value: 50
}
]
Query: ?group_level=1&startkey=[1230000000]&endkey=[1340000000,{}]
First Key: doc.time before doc.address
Problem: Cannot see and I am not grouped by doc.address
Code:
rows: [
{
key: [ 1231469665 ],
value: 50
},
{
key: [ 1231469744 ],
value: 50
}
]
You mentioned that:
... If I put doc.time as the first key, I cannot seem to group by unique addresses no matter what I specify as a group_level ...
The query parameter group_level=N splits the string on the Nth comma and groups the left elements together by string match. Therefore, When your array key is like this: [doc.time, doc.address], you won't be able to group by address, which is not on the left side of the comma.
... If I put doc.address first, I cannot seem to filter the query by time ...
When your array key is like: [doc.address, doc.time], notice that you are emitting an array key inside your Map function. You need to consider the following points regarding array key or compound key in CouchDB:
Described on this reference:
... First thing of note and very important ... an array output ... from the javascript Map function ... each of those Index Keys are strings, and are ordered character by character as strings, including the brackets and commas ...
The above statement and explanations on the reference have a significant impact on how CouchDB indexing works in the case of compound key or array key.
To clarify, lets create documents like below on a sample database:
{"time":"2011","address":"CT"}
{"time":"2012","address":"CT"}
...
{"time":"2011","address":"TX"}
...
{"time":"2015","address":"TX"}
...
{"time":"2014","address":"NY"}
...
{"time":"2014","address":"CA"}
{"time":"2015","address":"CA"}
{"time":"2016","address":"CA"}
I implemented a view map function like this:
function (doc) {
if(doc.time && doc.address){
emit([doc.address, doc.time], null);
}
}
For now, I'm not using any Reduce function, because, lets ignore any grouping or reducing and focus on plain simple indexing. The above view is generating the following key/value pairs for indexing:
$ curl -k -X GET 'https://admin:****#192.168.1.106:6984/sample/_design/by_addr_time/_view/by_addr_time'
{"total_rows":25,"offset":0,"rows":[
{"id":"doc_0022","key":["CA","2014"],"value":null},
{"id":"doc_0023","key":["CA","2015"],"value":null},
{"id":"doc_0024","key":["CA","2016"],"value":null},
{"id":"doc_0000","key":["CT","2011"],"value":null},
{"id":"doc_0001","key":["CT","2012"],"value":null},
{"id":"doc_0002","key":["CT","2013"],"value":null},
{"id":"doc_0003","key":["CT","2014"],"value":null},
{"id":"doc_0004","key":["CT","2015"],"value":null},
{"id":"doc_0005","key":["CT","2016"],"value":null},
{"id":"doc_0014","key":["NY","2011"],"value":null},
{"id":"doc_0015","key":["NY","2012"],"value":null},
{"id":"doc_0016","key":["NY","2013"],"value":null},
{"id":"doc_0017","key":["NY","2014"],"value":null},
{"id":"doc_0018","key":["NY","2015"],"value":null},
{"id":"doc_0019","key":["NY","2016"],"value":null},
{"id":"doc_0020","key":["NY","2017"],"value":null},
{"id":"doc_0021","key":["NY","2018"],"value":null},
{"id":"doc_0006","key":["TX","2011"],"value":null},
{"id":"doc_0008","key":["TX","2012"],"value":null},
{"id":"doc_0007","key":["TX","2013"],"value":null},
{"id":"doc_0009","key":["TX","2014"],"value":null},
{"id":"doc_0010","key":["TX","2015"],"value":null},
{"id":"doc_0011","key":["TX","2016"],"value":null},
{"id":"doc_0012","key":["TX","2017"],"value":null},
{"id":"doc_0013","key":["TX","2018"],"value":null}
]}
Now, I'm going to do a query to filter the view by doc.time. My query parameters are:
?startkey=["AA","2017"]&endkey=["ZZ","2018"]
I expect the above query to return only the docs with the time field between 2017 and 2018, the address field of those docs can have any value since I specified from AA to ZZ which includes all addresses on my database. I'm doing the query with curl like this:
$ curl -k -X GET 'https://admin:****#192.168.1.106:6984/sample/_design/by_addr_time/_view/by_addr_time?startkey=\["AA","2017"\]&endkey=\["ZZ","2018"\]'
{"total_rows":25,"offset":0,"rows":[
{"id":"doc_0022","key":["CA","2014"],"value":null},
{"id":"doc_0023","key":["CA","2015"],"value":null},
{"id":"doc_0024","key":["CA","2016"],"value":null},
{"id":"doc_0000","key":["CT","2011"],"value":null},
{"id":"doc_0001","key":["CT","2012"],"value":null},
{"id":"doc_0002","key":["CT","2013"],"value":null},
{"id":"doc_0003","key":["CT","2014"],"value":null},
{"id":"doc_0004","key":["CT","2015"],"value":null},
{"id":"doc_0005","key":["CT","2016"],"value":null},
{"id":"doc_0014","key":["NY","2011"],"value":null},
{"id":"doc_0015","key":["NY","2012"],"value":null},
{"id":"doc_0016","key":["NY","2013"],"value":null},
{"id":"doc_0017","key":["NY","2014"],"value":null},
{"id":"doc_0018","key":["NY","2015"],"value":null},
{"id":"doc_0019","key":["NY","2016"],"value":null},
{"id":"doc_0020","key":["NY","2017"],"value":null},
{"id":"doc_0021","key":["NY","2018"],"value":null},
{"id":"doc_0006","key":["TX","2011"],"value":null},
{"id":"doc_0008","key":["TX","2012"],"value":null},
{"id":"doc_0007","key":["TX","2013"],"value":null},
{"id":"doc_0009","key":["TX","2014"],"value":null},
{"id":"doc_0010","key":["TX","2015"],"value":null},
{"id":"doc_0011","key":["TX","2016"],"value":null},
{"id":"doc_0012","key":["TX","2017"],"value":null},
{"id":"doc_0013","key":["TX","2018"],"value":null}
]}
The response returned by the above query seems shocking. Because it looks like it did NOT return only the docs with time filed between 2017 and 2018. That's just how the CouchDB indexing for array keys work. CouchDB does the indexing of array keys as if the whole array is a string including the brackets and commas of the array! If you read the reference, it would start to make sense.
Now lets change the query:
?startkey=["CT","2016"]&endkey=["TX","2011"]
The result of the above query is shown below, based on our explanations, this should make sense:
$ curl -k -X GET 'https://admin:****#192.168.1.106:6984/sample/_design/by_addr_time/_view/by_addr_time?startkey=\["CT","2016"\]&endkey=\["TX","2011"\]'
{"total_rows":25,"offset":8,"rows":[
{"id":"doc_0005","key":["CT","2016"],"value":null},
{"id":"doc_0014","key":["NY","2011"],"value":null},
{"id":"doc_0015","key":["NY","2012"],"value":null},
{"id":"doc_0016","key":["NY","2013"],"value":null},
{"id":"doc_0017","key":["NY","2014"],"value":null},
{"id":"doc_0018","key":["NY","2015"],"value":null},
{"id":"doc_0019","key":["NY","2016"],"value":null},
{"id":"doc_0020","key":["NY","2017"],"value":null},
{"id":"doc_0021","key":["NY","2018"],"value":null},
{"id":"doc_0006","key":["TX","2011"],"value":null}
]}
UPDATE
... What I want is to filter by doc.time, then group the remaining records by doc.address ...
So, what should we do? There is a good question and answer and provides the basic ideas.
I'm not sure which idea is the best, but I implemented one idea like this: created a view named t_red like below with a builtin _count reduce:
function (doc) {
if(doc.time && doc.address){
emit([doc.time, doc.address], null);
}
}
Also, I created a view named a_red with a builtin _count reduce:
function (doc) {
if(doc.address && doc.time){
emit([doc.address, doc.time], null);
}
}
Then I developed the following code on NodeJS to query doc.time between 2012 and 2015 and then group the results according to the doc.address, console logs are shown inside the code as comments. I hope this code will be helpful (not confusing!):
process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0"; // Ignore rejection, becasue CouchDB SSL certificate is self-signed
const fetch=require('node-fetch')
// query "t_red" view/index
fetch(`https://admin:****#192.168.1.106:6984/sample/_design/t_red/_view/t_red?group_level=2&startkey=["2012", "AA"]&endkey=["2015", "ZZ"]`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
}
}).then(
res=>res.json()
).then(data=>{
let unique_addr=[]
data.rows.map(row=>{
console.log('row.key-> ', row.key, ' row.value-> ', row.value)
// console log is shown below:
//
// row.key-> [ '2012', 'CT' ] row.value-> 1
// row.key-> [ '2012', 'NY' ] row.value-> 1
// row.key-> [ '2012', 'TX' ] row.value-> 1
// row.key-> [ '2013', 'CT' ] row.value-> 1
// row.key-> [ '2013', 'NY' ] row.value-> 1
// row.key-> [ '2013', 'TX' ] row.value-> 1
// row.key-> [ '2014', 'CA' ] row.value-> 1
// row.key-> [ '2014', 'CT' ] row.value-> 1
// row.key-> [ '2014', 'NY' ] row.value-> 1
// row.key-> [ '2014', 'TX' ] row.value-> 1
// row.key-> [ '2015', 'CA' ] row.value-> 1
// row.key-> [ '2015', 'CT' ] row.value-> 1
// row.key-> [ '2015', 'NY' ] row.value-> 1
// row.key-> [ '2015', 'TX' ] row.value-> 1
if(unique_addr.indexOf(row.key[1])==-1){ // Push unique addresses into an array
unique_addr.push(row.key[1])
}
})
console.log(unique_addr)
// Console log is shown below:
//
// [ 'CT', 'NY', 'TX', 'CA' ]
return unique_addr
}).then(unique_addr=>{
// Group the unique addresses
let group_by_address=unique_addr.map(addr=>{
// For each unique address, do a query of "a_red" view/index
return fetch(`https://admin:****#192.168.1.106:6984/sample/_design/a_red/_view/a_red?group_level=2&startkey=["${addr}","2012"]&endkey=["${addr}","2015"]`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
}
}).then(
res=>res.json()
).then(data=>{
data.rows.map(row=>{console.log('row.key-> ', row.key, ' row.value-> ', row.value)})
// Console logs related to this section of code are shown below
//row.key-> [ 'CA', '2014' ] row.value-> 1
//row.key-> [ 'CA', '2015' ] row.value-> 1
//row.key-> [ 'NY', '2012' ] row.value-> 1
//row.key-> [ 'NY', '2013' ] row.value-> 1
//row.key-> [ 'NY', '2014' ] row.value-> 1
//row.key-> [ 'NY', '2015' ] row.value-> 1
//row.key-> [ 'CT', '2012' ] row.value-> 1
//row.key-> [ 'CT', '2013' ] row.value-> 1
//row.key-> [ 'CT', '2014' ] row.value-> 1
//row.key-> [ 'CT', '2015' ] row.value-> 1
//row.key-> [ 'TX', '2012' ] row.value-> 1
//row.key-> [ 'TX', '2013' ] row.value-> 1
//row.key-> [ 'TX', '2014' ] row.value-> 1
//row.key-> [ 'TX', '2015' ] row.value-> 1
let obj={}
obj[addr]=data.rows.length // This object contains unique address and its corresponding frequency in above query
return obj
}).catch(err=>{
console.log('err-> ', err)
})
})
return group_by_address
}).then(group_by_address=>{
group_by_address.map(group=>{
group.then(()=>{
console.log('Grouped by address-> ', group)
// Console logs related this section of code are shown below:
//Grouped by address-> Promise { { CA: 2 } }
//Grouped by address-> Promise { { NY: 4 } }
//Grouped by address-> Promise { { CT: 4 } }
//Grouped by address-> Promise { { TX: 4 } }
})
})
}).catch(err=>{
console.log('err-> ', err)
})

Limitations on mongodb Polygon

I'm trying to insert a Polygon on a mongodb document
polygons: {
type: "Polygon",
coordinates: [
[
[ -104.6093679, 50.449794 ],
[ -104.6093679, 50.449794 ],
[ -104.6093863, 50.449794 ],
[ -104.6093863, 50.449794 ],
[ -104.6093679, 50.449794 ]
]
]
}
But mongodb throws this exception: "Can't extract geo keys from object, malformed geometry?" I also checked the correctness of index and verified that with the same structure but different data, insert works well. My question is: are there some limitations on Polygon area that mongodb can manage?