Convert type from struct table to base.FixedDataGrid in GO - mongodb

i'm having a trouble converting my struct table to fixedDataGrid, because i need my data to be a fixedDataGrid so that i can use machine learning methods from GoLearn lib.
My struct is like this:
type dataStruct struct{
Sepal_length string
Sepal_width string
Petal_length string
Petal_width string
Species string
}
So when i get my data from my mongo db, i get them like this:
var results []dataStruct
err := col.Find(nil).All(&results)
Is there a way to convert my "results" from []dataStruct type to base.FixedDataGrid ??
CreateModel function:
func CreateModel(c echo.Context) error {
fmt.Println("====> Entry CreateModel function");
//var results []dataStruct
var Success bool = false
Db := db.MgoDb{}
Db.Init()
defer Db.Close()
col := Db.C(db.TrainingDataCollection)
var results dataStruct
if err := col.Find(nil).All(results); err != nil {
fmt.Println("ERROR WHILE GETTING THE TRAINING DATA")
} else {
//fmt.Println("Results All: ", results)
Success = true
}
fmt.Println("=============",results)
//Initialises a new KNN classifier
cls := knn.NewKnnClassifier("euclidean", "linear", 2)
//Do a training-test split
trainData, testData := base.InstancesTrainTestSplit(results, 0.55)
cls.Fit(trainData)
//Calculates the Euclidean distance and returns the most popular label
predictions, err := cls.Predict(testData)
if err != nil {
panic(err)
}
fmt.Println(predictions)
// Prints precision/recall metrics
confusionMat, err := evaluation.GetConfusionMatrix(testData, predictions)
if err != nil {
panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error()))
}
fmt.Println(evaluation.GetSummary(confusionMat))
return c.JSON(http.StatusOK, Success)
}
Thank you in advance for your help !

Here is how i solved the issue: Actually there is a function InstancesFromMat64(row int, col int, matrix) than creates instances from a float64 matrix, and this is what i used:
func CreateModel(c echo.Context) error {
fmt.Println("====> Entry CreateModel function");
var Success bool = false
Db := db.MgoDb{}
Db.Init()
defer Db.Close()
col := Db.C(db.TrainingDataCollection)
var results dataStruct
if err := col.Find(nil).All(&results); err != nil {
fmt.Println("ERROR WHILE GETTING THE TRAINING DATA")
} else {
Success = true
}
Data := make([]float64, len(results*nbAttrs)
/**** Filling the Data var with my dataset data *****/
mat := mat64.NewDense(row,nbAttrs,Data)
inst := base.InstancesFromMat64(row,nbAttrs,mat)
//Selecting the class attribute for our instance
attrs := inst.AllAttributes()
inst.AddClassAttribute(attrs[4])
//Initialise a new KNN classifier
cls := knn.NewKnnClassifier("manhattan","linear",3)
//Training-tessting split
trainData, testData := base.InstancesTrainTestSplit(inst,0.7)
/******* Continue the Model creation ******/
I'll be glad if my answer helps someone.
Thanks a lot #mkopriva for your help !

base.FixedDataGrid is an interface, so what you need to do is to implement that interface, that is, implement all of its methods, on the type you want to use as FixedDataGrid.
Since you want to use []dataStruct, a slice of dataStructs, which is an unnamed type, as FixedDataGrid you will have to declare a new type to be able to add methods to it because you can add methods only to named types. For example something like this:
type dataStructList []dataStruct
Now, if you take a look at the documentation, you can see that the FixedDataGrid interface declares two methods RowString and Size but also embeds another interface, the base.DataGrid interface, which means you need to implement the methods declared by DataGrid as well. So, given your new dataStructList type, you can do something like this:
func (l dataStructList) RowString(int) string { /* ... */ }
func (l dataStructList) Size() (int, int) { /* ... */ }
func (l dataStructList) GetAttribute(base.Attribute) (base.AttributeSpec, error) { /* ... */ }
func (l dataStructList) AllAttributes() []base.Attribute { /* ... */ }
func (l dataStructList) AddClassAttribute(base.Attribute) error { /* ... */ }
func (l dataStructList) RemoveClassAttribute(base.Attribute) error { /* ... */ }
func (l dataStructList) AllClassAttributes() []base.Attribute { /* ... */ }
func (l dataStructList) Get(base.AttributeSpec, int) []byte { /* ... */ }
func (l dataStructList) MapOverRows([]base.AttributeSpec, func([][]byte, int) (bool, error)) error { /* ... */ }
After you've implemented the /* ... */ parts you can then start using dataStructList as a FixedDataGrid, so something like this:
var results []dataStruct
err := col.Find(nil).All(&results)
fdg := dataStructList(results) // you can use fdg as FixedDataGrid
Or
var results dataStructList // you can use results as FixedDataGrid
err := col.Find(nil).All(&results)
Update:
After you've implemented all of those methods on the dataStructList all you need is the type of the results variable inside your function:
func CreateModel(c echo.Context) error {
fmt.Println("====> Entry CreateModel function")
//var results []dataStruct
var Success bool = false
Db := db.MgoDb{}
Db.Init()
defer Db.Close()
col := Db.C(db.TrainingDataCollection)
var results dataStructList // <--- use the type that implements the interface
if err := col.Find(nil).All(&results); err != nil { // <-- pass a pointer to results
fmt.Println("ERROR WHILE GETTING THE TRAINING DATA")
} else {
//fmt.Println("Results All: ", results)
Success = true
}
fmt.Println("=============", results)
//Initialises a new KNN classifier
cls := knn.NewKnnClassifier("euclidean", "linear", 2)
//Do a training-test split
trainData, testData := base.InstancesTrainTestSplit(results, 0.55) // <-- this will work because results if of type dataStructList, which implements the base.FixedDataGrid interface.
cls.Fit(trainData)
//Calculates the Euclidean distance and returns the most popular label
predictions, err := cls.Predict(testData)
if err != nil {
panic(err)
}
fmt.Println(predictions)
// Prints precision/recall metrics
confusionMat, err := evaluation.GetConfusionMatrix(testData, predictions)
if err != nil {
panic(fmt.Sprintf("Unable to get confusion matrix: %s", err.Error()))
}
fmt.Println(evaluation.GetSummary(confusionMat))
return c.JSON(http.StatusOK, Success)
}

Related

Passing in dynamic struct into function in golang

I having a couple of structs, Products and Categories. I have 2 functions listed below that have identical logic just different structs that are being used and returned. Is there anyway I can abstract out the struct data types and use the same logic in a single function called GetObjects?
func GetCategories(collection *mongo.Collection) []Category {
ctx := context.Background()
cats := []Category{}
cur, err := collection.Find(ctx, bson.M{})
if err != nil {
log.Fatal("Error: ", err)
}
for cur.Next(context.TODO()) {
var cat Category
err = cur.Decode(&cat)
if err != nil {
log.Fatal(err)
}
cats = append(cats, cat)
}
return cats
}
func GetProducts(collection *mongo.Collection) []Product {
ctx := context.Background()
prods := []Product{}
cur, err := collection.Find(ctx, bson.M{})
if err != nil {
log.Fatal("Error: ", err)
}
for cur.Next(context.TODO()) {
var prod Product
err = cur.Decode(&prod)
if err != nil {
log.Fatal(err)
}
prods = append(prods, prod)
}
return prods
}
You could create a generalized GetObjs() if you would pass in the destination where you want the results to be loaded.
Something like:
func GetObjs(c *mongo.Collection, dst interface{})
And callers are responsible to pass a ready slice or a pointer to a slice variable where results will be stored.
Also note that context.Context should be passed and not created arbitrarily inside the function:
func GetObjs(ctx context.Context, c *mongo.Collection, dst interface{})
Also, errors should be returned and not "swallowed", so if one occurs, it can be dealt with appropriately at the caller:
func GetObjs(ctx context.Context, c *mongo.Collection, dst interface{}) error
Also, if you need all results, you don't need to iterate over them and decode all one-by-one. Just use Cursor.All().
This is how the "improved" GetObjs() could look like:
func GetObjs(ctx context.Context, c *mongo.Collection, dst interface{}) error {
cur, err := c.Find(ctx, bson.M{})
if err != nil {
return err
}
return cur.All(ctx, dst)
}
(Although this became quite simple, not sure it warrants its own existence.)
And this is how you could use it:
ctx := ... // Obtain context, e.g. from the request: r.Context()
c := ... // Collection you want to query from
var cats []Category
if err := GetObjs(ctx, c, &cats); err != nil {
// Handle error
return
}
// Use cats
var prods []Product
if err := GetObjs(ctx, c, &prods); err != nil {
// Handle error
return
}
// Use prods

How to marshal/unmarshal bson array with polymorphic struct with mongo-go-driver

I'm struggling to marshal/unmarshal bson array with polymorphic struct with mongo-go-driver。I plan to save a struct discriminator into the marshaled data, and write a custom UnmarshalBSONValue function to decode it according to the struct discriminator. But I don't know how to do it correctly.
package polymorphism
import (
"fmt"
"testing"
"code.byted.org/gopkg/pkg/testing/assert"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/bson/bsontype"
)
type INode interface {
GetName() string
}
type TypedNode struct {
NodeClass string
}
type Node struct {
TypedNode `bson:"inline"`
Name string
Children INodeList
}
func (n *Node) GetName() string {
return n.Name
}
type INodeList []INode
func (l *INodeList) UnmarshalBSONValue(t bsontype.Type, data []byte) error {
fmt.Println("INodeList.UnmarshalBSONValue")
var arr []bson.Raw // 1. First, try to decode data as []bson.Raw
err := bson.Unmarshal(data, &arr) // error: cannot decode document into []bson.Raw
if err != nil {
fmt.Println(err)
return err
}
for _, item := range arr { // 2. Then, try to decode each bson.Raw as concrete Node according to `nodeclass`
class := item.Lookup("nodeclass").StringValue()
fmt.Printf("class: %v\n", class)
if class == "SubNode1" {
bin, err := bson.Marshal(item)
if err != nil {
return err
}
var sub1 SubNode1
err = bson.Unmarshal(bin, &sub1)
if err != nil {
return err
}
*l = append(*l, &sub1)
} else if class == "SubNode2" {
//...
}
}
return nil
}
type SubNode1 struct {
*Node `bson:"inline"`
FirstName string
LastName string
}
type SubNode2 struct {
*Node `bson:"inline"`
Extra string
}
With code above, I'm trying to decode INodeList data as []bson.Raw, then decode each bson.Raw as a concrete Node according to nodeclass. But it report error:
cannot decode document into []bson.Raw
at line
err := bson.Unmarshal(data, &arr).
So, how to do it correctly?
You need to pass a bson.Raw’s pointer to bson.Unmarshal(data, &arr) and then slice its value to an array of raw values like:
func (l *INodeList) UnmarshalBSONValue(t bsontype.Type, data []byte) error {
fmt.Println("INodeList.UnmarshalBSONValue")
var raw bson.Raw // 1. First, try to decode data as bson.Raw
err := bson.Unmarshal(data, &raw)
if err != nil {
fmt.Println(err)
return err
}
// Slice the raw document to an array of valid raw values
rawNodes, err := raw.Values()
if err != nil {
return err
}
// 2. Then, try to decode each bson.Raw as concrete Node according to `nodeclass`
for _, rawNode := range rawNodes {
// Convert the raw node to a raw document in order to access its "nodeclass" field
d, ok := rawNode.DocumentOK()
if !ok {
return fmt.Errorf("raw node can't be converted to doc")
}
class := d.Lookup("nodeclass").StringValue()
// Decode the node's raw doc to the corresponding struct
var node INode
switch class {
case "SubNode1":
node = &SubNode1{}
case "SubNode2":
node = &SubNode2{}
//...
default:
// ...
}
bson.Unmarshal(d, node)
*l = append(*l, node)
}
return nil
}
Note that Note.Children must be a INodeList's pointer, and the inline fields must be a struct or a map (not a pointer):
type Node struct {
TypedNode `bson:",inline"`
Name string
Children *INodeList
}
type SubNode1 struct {
Node `bson:",inline"`
FirstName string
LastName string
}
type SubNode2 struct {
Node `bson:",inline"`
Extra string
}

MutableSideEffect() panics when setting second value

We're in the process of writing a .NET Cadence client and are a bit confused with how MutableSideEffect() is supposed to work. We've been thinking of the ID being passed as essentially a variable name and that developers should be able to update mutable values in a workflow. When we try this though, the second MutableSideEffect() call fails with this panic:
panic: adding duplicate decision DecisionType: Marker, ID: MutableSideEffect_value-1, state=Created, isDone()=false, history=[Created]
We munged the greetings workflow sample to make these calls:
package main
import (
"fmt"
"math/rand"
"time"
"go.uber.org/cadence/activity"
"go.uber.org/cadence/workflow"
"go.uber.org/zap"
)
/**
* This greetings sample workflow executes 3 activities in sequential. It gets greeting and name from 2 different activities,
* and then pass greeting and name as input to a 3rd activity to generate final greetings.
*/
// ApplicationName is the task list for this sample
const ApplicationName = "greetingsGroup"
// This is registration process where you register all your workflows
// and activity function handlers.
func init() {
workflow.Register(SampleGreetingsWorkflow)
activity.Register(getGreetingActivity)
activity.Register(getNameActivity)
activity.Register(sayGreetingActivity)
}
// SampleGreetingsWorkflow Workflow Decider.
func SampleGreetingsWorkflow(ctx workflow.Context) error {
// Get Greeting.
ao := workflow.ActivityOptions{
ScheduleToStartTimeout: time.Minute,
StartToCloseTimeout: time.Minute,
HeartbeatTimeout: time.Second * 20,
}
ctx = workflow.WithActivityOptions(ctx, ao)
logger := workflow.GetLogger(ctx)
var greetResult string
err := workflow.ExecuteActivity(ctx, getGreetingActivity).Get(ctx, &greetResult)
if err != nil {
logger.Error("Get greeting failed.", zap.Error(err))
return err
}
f := func(ctx workflow.Context) interface{} {
return rand.Intn(100)
}
e := func(a, b interface{}) bool {
if a == b {
return true
}
return false
}
var result int
sideEffectValue := workflow.MutableSideEffect(ctx, "value-1", f, e)
err = sideEffectValue.Get(&result)
if err != nil {
panic(err)
}
logger.Debug("MutableSideEffect-1", zap.Int("Value", result))
//************** THIS CALL FAILS **************
sideEffectValue = workflow.MutableSideEffect(ctx, "value-1", f, e)
err = sideEffectValue.Get(&result)
if err != nil {
panic(err)
}
logger.Debug("MutableSideEffect-2", zap.Int("Value", result))
// Get Name.
var nameResult string
err = workflow.ExecuteActivity(ctx, getNameActivity).Get(ctx, &nameResult)
if err != nil {
logger.Error("Get name failed.", zap.Error(err))
return err
}
// Say Greeting.
var sayResult string
err = workflow.ExecuteActivity(ctx, sayGreetingActivity, greetResult, nameResult).Get(ctx, &sayResult)
if err != nil {
logger.Error("Marshalling failed with error.", zap.Error(err))
return err
}
logger.Info("Workflow completed.", zap.String("Result", sayResult))
return nil
}
// Get Name Activity.
func getNameActivity() (string, error) {
return "Cadence", nil
}
// Get Greeting Activity.
func getGreetingActivity() (string, error) {
return "Hello", nil
}
// Say Greeting Activity.
func sayGreetingActivity(greeting string, name string) (string, error) {
result := fmt.Sprintf("Greeting: %s %s!\n", greeting, name)
return result, nil
}
Are we thinking about this correctly?
This is a bug in the Go client library. It happens when a MutableSideEffect with the same id is used multiple times during a single decision.
If you force a separate decision by putting workflow.Sleep(ctx, time.Second) just before the second MutableSideEffect call the problem disappears.
I filed an issue to get this fixed.
Thanks a lot for reporting!

How to create elements of a struct with a Mutex field

I have a Get() function:
func Get(url string) *Response {
res, err := http.Get(url)
if err != nil {
return &Response{}
}
// res.Body != nil when err == nil
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
if err != nil {
log.Fatalf("ReadAll: %v", err)
}
reflect.TypeOf(body)
return &Response{sync.Mutex(),string(body), res.StatusCode}
}
as well as a Read() function:
func Read(url string, timeout time.Duration) (res *Response) {
done := make(chan bool)
go func() {
res = Get(url)
done <- true
}()
select { // As soon as either
case <-done: // done is sent on the channel or
case <-time.After(timeout): // timeout
res = &Response{"Gateway timeout\n", 504}
}
return
}
the Response type returned by the functions is defined as:
type Response struct {
Body string
StatusCode int
}
This read function makes use of the Get() function and also implements a timeout. The problem is that a data race can occur if the timeout occurs and the Get() response is written to res at the same time in Read().
I have a plan for how to solve this. It is to use Mutex. To do this, I would add a field to the Response struct:
type Response struct {
mu sync.Mutex
Body string
StatusCode int
}
so that the Response can be locked. However, I'm not sure how to fix this in the other parts of the code.
My attempt looks like this, for the Get():
func Get(url string) *Response {
res, err := http.Get(url)
if err != nil {
return &Response{}
}
// res.Body != nil when err == nil
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
if err != nil {
log.Fatalf("ReadAll: %v", err)
}
reflect.TypeOf(body)
return &Response{sync.Mutex(),string(body), res.StatusCode} // This line is changed.
}
and for the Read():
func Read(url string, timeout time.Duration) (res *Response) {
done := make(chan bool)
res = &Response{sync.Mutex()} // this line has been added
go func() {
res = Get(url)
done <- true
}()
select {
case <-done:
case <-time.After(timeout):
res.mu.Lock()
res = &Response{sync.Mutex(), "Gateway timeout\n", 504} // And mutex was added here.
}
defer res.mu.Unlock()
return
}
This "solution" generates these errors:
./client.go:54: missing argument to conversion to sync.Mutex: sync.Mutex()
./client.go:63: missing argument to conversion to sync.Mutex: sync.Mutex()
./client.go:63: too few values in struct initializer
./client.go:73: missing argument to conversion to sync.Mutex: sync.Mutex()
./client.go:95: cannot use "Service unavailable\n" (type string) as type sync.Mutex in field value
./client.go:95: cannot use 503 (type int) as type string in field value
./client.go:95: too few values in struct initializer
What is the correct way of using Mutex in this case?
While your answer with Volker's guidance is good, you might want to consider using a non default http.Client so that you can set a Timeout on the client making the request (then you don't have to worry about handling the timeouts yourself).
I followed Volker's suggestion and used a channel to solve the problem.
func Read(url string, timeout time.Duration) (res *Response) {
done := make(chan bool) // A channel
resChan := make(chan *Response)
go func() {
resChan <- Get(url)
done <- true
}()
select {
case <-done:
res = &Response{}
case <-time.After(timeout):
res = &Response{"Gateway timeout\n", 504}
}
return
}
Now, there can be no simultaneous writes to res. It's going to be either the timeout or the returned value of Get(url).

Golang - MongoDB (mgo) retrieve inserted file (BSON not GridFS)

I am a newbie in Golang.
I am trying to retrieve a PDF file object I have inserted. I am not using GridFS, as the files that I would be storing are under 16 MB.
The object has been inserted (using load_file function) and the object ID I am seeing with the MongoDB visual client is ObjectId("554f98a400afc2dd3cbfb21b").
Unfortunately the file created on disk is of 0 kb.
Please advise how to correctly retrieve the inserted PDF object.
Thank you
package main
import (
"fmt"
"io/ioutil"
"gopkg.in/mgo.v2"
)
type Raw struct {
Kind byte
Data []byte
}
type RawDocElem struct {
Name string
Value Raw
}
func check(err error) {
if err != nil {
panic(err.Error())
}
}
func read_file_content(AFileName string) []byte {
file_contents, err := ioutil.ReadFile(AFileName)
check(err)
return file_contents
}
func save_fetched_file(AFileName RawDocElem) {
ioutil.WriteFile("fisier.pdf", AFileName.Value.Data, 0644)
fmt.Println("FileName:", AFileName.Name)
}
func load_file(AFileName string, ADatabaseName string, ACollection string) {
session, err := mgo.Dial("127.0.0.1,127.0.0.1")
if err != nil {
panic(err)
}
defer session.Close()
session.SetMode(mgo.Monotonic, true)
c := session.DB(ADatabaseName).C(ACollection)
the_obj_to_insert := Raw{Kind: 0x00, Data: read_file_content(AFileName)}
err = c.Database.C(ACollection).Insert(&the_obj_to_insert)
check(err)
}
func get_file(ADatabaseName string, ACollection string, The_ID string) RawDocElem {
session, err := mgo.Dial("127.0.0.1,127.0.0.1")
if err != nil {
panic(err)
}
defer session.Close()
session.SetMode(mgo.Monotonic, true)
c := session.DB(ADatabaseName).C(ACollection)
result := RawDocElem{}
err = c.FindId(The_ID).One(&result)
return result
}
func main() {
//f_name := "Shortcuts.pdf"
db_name := "teste"
the_collection := "ColectiaDeFisiere"
//load_file(f_name, db_name, the_collection)
fmt.Sprintf(`ObjectIdHex("%x")`, string("554f98a400afc2dd3cbfb21b"))
save_fetched_file(get_file(db_name, the_collection, fmt.Sprintf(`ObjectIdHex("%x")`, string("554f98a400afc2dd3cbfb21b"))))
}
I think your way to build the object ID is wrong. Furthermore, you forgot to test the error following the FindId call.
I would try to import the bson package, and build the object ID using something like:
hexString := "554f98a400afc2dd3cbfb21b" //24 digits hexadecimal string
objid := bson.ObjectIdHex(hexString)
...
err = c.FindId(objid).One(&result)
if err == nil {
// Do something with result
...
} else {
// Error
...
}