I have a field in mongodb document which contains an array of numbers. I want to update this field with newly received numbers. But i need to add the numbers only if the number is not present in the array. In MongoDB we can use $setUnion but i am not sure on C# driver side. Anyone please suggest the solution.
$setUnion is Used to produce aggregation output.
You need to use AddToSetEach from c# driver
Please find full code snippet with check after insert
public static void Main()
{
var client = new MongoClient("mongodb://localhost:27017");
var database = client.GetDatabase("test");
var collection = database.GetCollection<KalaimaniData>("kalaimani");
// create array to inser
var arrayToInsert = new[] { 1, 4, 5, 6 };
var arrayToMerge = new[] { 2, 3, 4, 5 };
var arrayExpected = new[] { 1, 4, 5, 6, 2, 3 };
var doc = new KalaimaniData { Numbers = arrayToInsert };
collection.InsertOne(doc);
var filter = Builders<KalaimaniData>.Filter.Eq(x => x.Id, doc.Id);
var updateDef = new UpdateDefinitionBuilder<KalaimaniData>().AddToSetEach(x => x.Numbers, arrayToMerge);
collection.UpdateOne(filter, updateDef);
// retrive and compare
var changed = collection.Find(filter).First();
var matched = 0;
foreach (var element in arrayExpected)
{
if (changed.Numbers.Contains(element))
{
matched++;
}
}
if (changed.Numbers.Length == matched)
{
Console.WriteLine("OK");
}
else
{
Console.WriteLine("NOK");
}
Console.ReadLine();
}
/// <summary>TODO The kalaimani data.</summary>
class KalaimaniData
{
/// <summary>Gets or sets the id.</summary>
public ObjectId Id { get; set; }
/// <summary>Gets or sets the numbers.</summary>
public int[] Numbers { get; set; }
}
Related
Good morning,
I am trying to solve what I believe is a linear problem, using Microsofts SolverFoundation in code (c#). I see most references to this type of solving are related to the Solver inside of Excel, and it indeed does share many similarities. However, I have written up an example of what I am trying to do.
For this example, lets say I have 3 grocery stores (Albertsons, Safeway, Costco), and 3 different types of apples (Red, Green, Fuji).
public enum AppleType
{
Red,
Green,
Fuji,
}
Each of these stores offers these different apple types at different percentages per order.
public class Store
{
public string Name { get; set; }
public List<Apple> ApplesOffered { get; set; }
}
public class Apple
{
public AppleType AppleType { get; set; }
public double OrderPercent { get; set; }
}
Here is my mock-data for this setup.
List<Store> stores = new List<Store>
{
new Store()
{
Name = "Albertsons",
ApplesOffered = new List<Apple>
{
new Apple(){AppleType = AppleType.Red, OrderPercent = 80 },
new Apple(){AppleType = AppleType.Green, OrderPercent = 15 },
new Apple(){AppleType = AppleType.Fuji, OrderPercent = 0 }
}
},
new Store()
{
Name = "Safeway",
ApplesOffered = new List<Apple>
{
new Apple(){AppleType = AppleType.Red, OrderPercent = 12 },
new Apple(){AppleType = AppleType.Green, OrderPercent = 30 },
new Apple(){AppleType = AppleType.Fuji, OrderPercent = 0 }
}
},
new Store()
{
Name = "Costco",
ApplesOffered = new List<Apple>
{
new Apple(){AppleType = AppleType.Red, OrderPercent = 10 },
new Apple(){AppleType = AppleType.Green, OrderPercent = 35 },
new Apple(){AppleType = AppleType.Fuji, OrderPercent = 40 }
}
}
};
Alright, so say I have a list of apple types and I want to choose how many of each I want, and the solver should give me the minimal optimized orders from different stores to get what I want.
my code for building the constraints is as follows:
var context = SolverContext.GetContext();
context.ClearModel();
var model = context.CreateModel();
// Decisions
stores.ForEach(store => model.AddDecisions(new Decision(Domain.RealNonnegative, store.Name)));
// Constraints
var constraints = new List<CustomAppleConstraint>();
stores.ForEach(store =>
{
foreach (AppleType a in (AppleType[])Enum.GetValues(typeof(AppleType)))
{
var ao = store.ApplesOffered.FirstOrDefault(_ => _.AppleType == a);
if (ao != null && ao.OrderPercent > 0)
{
constraints.Add(new CustomAppleConstraint
{
Type = a,
Value = $"0.{ao.OrderPercent} * {store.Name}"
});
}
}
});
// Add Constraints to model
var constraintGroups = constraints.GroupBy(_ => _.Type).ToList();
foreach (AppleType a in (AppleType[])Enum.GetValues(typeof(AppleType)))
{
var group = constraintGroups.FirstOrDefault(_ => _.Key == a);
if (group != null)
{
model.AddConstraint($"_{a}", $"{(string.Join(" + ", group.Select(_ => _.Value).ToArray()))} >= {order[a]}");
}
}
// Solve
var solution = context.Solve(new SimplexDirective());
var solutionResults = new List<KeyValuePair<string, double>>();
foreach (var decision in solution.Decisions)
{
var value = (double)decision.GetValues().First()[0];
solutionResults.Add(new KeyValuePair<string, double>(decision.Name, value));
}
return solutionResults;
I have tested this with a bunch of different orders, and it all appears to be giving me the correct data.
Now say I have the following simple order where I only want green apples:
{AppleType.Red, 0},
{AppleType.Green, 10},
{AppleType.Fuji, 0}
I get back a result suggesting 28.57 orders from Costco, which I would expect because Costco offers the highest percentage of green apples per order.
So here is where I am trying to figure the correct way to implement one more constraint.
Say I have some preferences for which stores I use to get certain apple types. (eg, I only want my green apples from Safeway).
// Apple Type : Preferred Store
var orderPrefs = new Dictionary<AppleType, string>()
{
{AppleType.Red, "Albertsons"},
{AppleType.Green, "Safeway" },
{AppleType.Fuji, "Costco" }
};
So even though Costco provides the highest percentage of green apples, I want to add some constraints from my preferences that prioritize the green apple result from Safeway. In this instance, my prefs say I Only want green apple from Safeway, so 100% of this order should come from Safeway.
Granted this is a very simple example, but its the meat of what I am trying to wrap my head around.
I hope this makes sense... I have been trying to figure this out for a few days now with no luck. Thank you.
My application is ASP.NET MVC 5 / SQL Server.
I am trying to select specific columns from a list based on an array:
First list has 200 columns: Age, Gender, .....
var list1 = _reportRepository.ShowMasteView().ToList();
Second list has 20 columns: Age, Gender, ......
From the view I select the items to be displayed:
string[] lits2 = showColumn.Where(c => c.Value == true).Select(c=> c.Key).ToArray();
I get
To get these two specific columns, I tried
var nList = list1.Select(t2 => lits2.Any(t1 => t2.Contains(t1)));
I get an error
Can not resolve symbol "Contains"
I was able to do it using the following
var keys = "Age,Gender";
var connection =
ConfigurationManager.ConnectionStrings["DALEntities"].ConnectionString;
using (var dataAdapter = new SqlDataAdapter("SELECT " + keys
+ " from dbo.vw_MasterView", connection))
{
var dataTable = new DataTable();
dataAdapter.Fill(dataTable);
dataAdapter.FillSchema(dataTable, SchemaType.Mapped);
return dataTable;
}
Is there a better way in linq?
From my understand it appears you are trying to extract/select a dynamic object that only has the desired properties/columns.
This can be achieved by building a dynamic expression/function to apply to the Select
The following builds an expression based on the model type and the provided properties
static class DynamicExtensions {
public static IQueryable<dynamic> SelectDynamic<TModel>(this IQueryable<TModel> query, ISet<string> propertyNames) {
var selector = query.BuildSelectorFor(propertyNames);
return query.Select(selector);
}
static Expression<Func<TModel, dynamic>> BuildSelectorFor<TModel>(this IQueryable<TModel> query, ISet<string> propertyNames) {
var modelType = typeof(TModel);
var properties = modelType.GetProperties().Where(p => propertyNames.Contains(p.Name));
// Manually build the expression tree for
// the lambda expression v => new { PropertyName = v.PropertyName, ... }
// (TModel v) =>
var parameter = Expression.Parameter(modelType, "v");
// v.PropertyName
var members = properties.Select(p => Expression.PropertyOrField(parameter, p.Name));
var addMethod = typeof(IDictionary<string, object>).GetMethod(
"Add", new Type[] { typeof(string), typeof(object) });
// { { "PropertyName", v.PropertyName}, ... }
var elementInits = members.Select(m =>
Expression.ElementInit(addMethod, Expression.Constant(m.Member.Name), Expression.Convert(m, typeof(object))));
// new ExpandoObject()
var newExpando = Expression.New(typeof(ExpandoObject));
// new ExpandoObject() { { "PropertyName", v.PropertyName}, ... }
var expando = Expression.ListInit(newExpando, elementInits);
// (TModel v) => new ExpandoObject() { { "PropertyName", v.PropertyName}, ... }
var lambdaExpression = Expression.Lambda<Func<TModel, dynamic>>(expando, parameter);
return lambdaExpression;
}
}
This takes advantage of ExpandoObject whose members can be dynamically added and removed at run time.
The following test was used as an example of how the above function is invoked.
[TestMethod]
public void DynamicList() {
var list1 = new List<Person>
{
new Person{ Gender = "Male", Age = 10, FirstName = "Nama1", SampleNumber = 12},
new Person{ Gender = "Male", Age = 12, FirstName = "Nama2", SampleNumber = 13},
new Person{ Gender = "Female", Age = 13, FirstName = "Nama3", SampleNumber = 14},
new Person{ Gender = "Male", Age = 14, FirstName = "Nama4", SampleNumber = 15},
};
var keys = new string[] { "Age", "Gender", };
var nList = list1.AsQueryable().SelectDynamic(new HashSet<string>(keys));
foreach (IDictionary<string, object> row in nList) {
var msg = $"{{ {keys[0]} = {row[keys[0]]}, {keys[1]} = {row[keys[1]]} }}";
Debug.WriteLine(msg);
}
}
and produces the following output
{ Age = 10, Gender = Male }
{ Age = 12, Gender = Male }
{ Age = 13, Gender = Female }
{ Age = 14, Gender = Male }
The dynamic objects can be used in the View and it is a simple matter of calling the desired members.
For example suppose you have a model as follows
public class MyViewModel {
public string MyProperty { get; set; }
public string[] Keys { get; set; }
public List<dynamic> MyDynamicProperty { get; set; }
}
that was populated with data and given to the view
var list1 = _reportRepository.ShowMasteView();
var keys = new string[] { "Age", "Gender", };
var nList = list1.AsQueryable().SelectDynamic(new HashSet<string>(keys));
var viewModel = new MyViewModel {
MyProperty = "Hello World",
MyDynamicProperty = nList.ToList(),
Keys = keys
};
return View(viewModel);
Then in the view you can use the model as desired, casting to get access to members in the expando object.
#model MyViewModel
...
<h2>#Model.MyProperty</h2>
<table>
<tr>
#foreach(string key in Model.Keys) {
<th>#key</th>
}
</tr>
#foreach (IDictionary<string, object> row in Model.MyDynamicProperty) {
<tr>
#foreach(string key in Model.Keys) {
<td>#row[#key]</td>
}
</tr>
}
</table>
I think you just need to use Contains on your list2.
var nList = list1.Where(t => lits2.Contains(t1));
Contains is a method for Lists. The code you had was trying to use it on a string.
If you have two list of a person's class
public class Person
{
public int id { get; set; }
public string name { get; set; }
}
If the lists are as below:
var list1 = new List<Person>
{
new Person{ id = 1, name = "Nama1"},
new Person{ id = 2, name = "Nama2"},
new Person{ id = 3, name = "Nama3"},
new Person{ id = 4, name = "Nama4"},
};
var list2 = new List<Person>
{
new Person{ id = 1, name = "Nama1"},
new Person{ id = 2, name = "Nama2"},
};
You can filter in the following ways
var keys = list2.Select(x => x.id).ToList();
var filter1= list1.Where(x => keys.Contains(x.id)).ToList();
var filter2= list1.Where(x => keys.Contains(x.id)).Select(x => new { x.name }).ToList();
var filter3= list1.Select(x => new
{
id = x.id,
name = x.name,
check = keys.Contains(x.id)
}).Where(x => x.check).ToList();
If you have array of string
you can use below code
array string same
var lis1 = new string[] {"name1", "name2","name3" };
var lis2 = new string[] { "name1" };
You can filter array of string in the following ways
var items1= lis1.Where(x=>lis2.Contains(x)).ToList();
var items= lis1.Select(x=> new { x, check= lis2.Contains(x) }).Where(x=>x.check == true).ToList();
As my application currently sits, each AppUser may (or may not) have 3 phone numbers (UserPhones). One of each type (Mobile, Home, Other).
The following Tag Helper works great (Thanks #itminus).
Calling code from Razor Page:
<user-phones phones="#Model.UserPhones"
asp-for="#Model.UserPhones"
prop-name-to-edit="PhoneNumber"
types-to-edit="new EnumPhoneType[] { EnumPhoneType.Mobile,
EnumPhoneType.Other }" />
Code:
public class UserPhonesTagHelper : TagHelper
{
private readonly IHtmlGenerator _htmlGenerator;
private const string ForAttributeName = "asp-for";
[HtmlAttributeName("expression-filter")]
public Func<string, string> ExpressionFilter { get; set; } = e => e;
public List<UserPhones> Phones { get; set; }
public EnumPhoneType[] TypesToEdit { get; set; }
public string PropNameToEdit { get; set; }
[ViewContext]
public ViewContext ViewContext { set; get; }
[HtmlAttributeName(ForAttributeName)]
public ModelExpression For { get; set; }
public UserPhonesTagHelper(IHtmlGenerator htmlGenerator)
{
_htmlGenerator = htmlGenerator;
}
public override async Task ProcessAsync(TagHelperContext context, TagHelperOutput output)
{
output.TagName = null; //DO NOT WANT AN OUTTER HTML ELEMENT
for (int i = 0; i < Phones.Count(); i++)
{
var props = typeof(UserPhones).GetProperties();
var pType = props.Single(z => z.Name == "Type");
var pTypeVal = pType.GetValue(Phones[i]);
EnumPhoneType eType = (EnumPhoneType) Enum.Parse(typeof(EnumPhoneType), pTypeVal.ToString());
string lVal = null;
switch (eType)
{
case EnumPhoneType.Home:
lVal = "Home Phone";
break;
case EnumPhoneType.Mobile:
lVal = "Mobile Phone";
break;
case EnumPhoneType.Other:
lVal = "Other Phone";
break;
default:
break;
}
//LOOP ALL PROPERTIES
foreach (var pi in props)
{
var v = pi.GetValue(Phones[i]);
var expression = this.ExpressionFilter(For.Name + $"[{i}].{pi.Name}");
var explorer = For.ModelExplorer.GetExplorerForExpression(typeof(IList<UserPhones>), o => v);
//IF REQUESTED TYPE AND PROPERTY SPECIFIED
if (pi.Name.NormalizeString() == PropNameToEdit.NormalizeString() && TypesToEdit.Contains(eType))
{
TagBuilder gridItem = new TagBuilder("div");
gridItem.Attributes.Add("class", "rvt-grid__item");
gridItem.InnerHtml.AppendHtml(BuildLabel(explorer, expression, lVal));
gridItem.InnerHtml.AppendHtml(BuildTextBox(explorer, expression, v.ToString()));
output.Content.AppendHtml(gridItem);
}
else //ADD HIDDEN FIELD SO BOUND PROPERLY
output.Content.AppendHtml(BuildHidden(explorer, expression, v.ToString()));
}
}
}
private TagBuilder BuildTextBox(ModelExplorer explorer, string expression, string v)
{
return _htmlGenerator.GenerateTextBox(ViewContext, explorer, expression, v, null, new { #class = "form-control" });
}
public TagBuilder BuildHidden(ModelExplorer explorer, string expression, string v)
{
return _htmlGenerator.GenerateHidden(ViewContext, explorer, expression, v, false, new { });
}
public TagBuilder BuildLabel(ModelExplorer explorer, string expression, string v)
{
return _htmlGenerator.GenerateLabel(ViewContext, explorer, expression, v, new { });
}
}
My Question:
Lets assume this AppUser only has one related Mobile phone number listed currently. So AppUser.UserPhones (count = 1 of type Mobile). So the code above, as-is, will only render an input for Mobile phone.
Since types-to-edit calls for both Mobile and Other, I want both inputs to be rendered to the screen. And IF the user adds a phone number to the Other input, then it would be saved to the related UserPhones entity on the Razor Pages OnPostAsync method. If the user does NOT provide a number for the "Other" input, then the related UserPhones record of type "Other" should NOT be created.
Can you help?
Thanks again!!!!
TagHelper
As my application currently sits, each AppUser may (or may not) have 3 phone numbers (UserPhones). One of each type (Mobile, Home, Other).
If I understand correctly, an AppUser might have 3 phone numbers and the count of each phone type for every user will be zero or one.
If that's the case, we can simply use PhoneType as an index, in other words, there's no need to use a custom index to iterate through the Phones property, and the ProcessAsync() method could be :
public override async Task ProcessAsync(TagHelperContext context, TagHelperOutput output)
{
output.TagName = null; //DO NOT WANT AN OUTTER HTML ELEMENT
var props = typeof(UserPhones).GetProperties();
// display editable tags for phones
foreach (var pt in this.TypesToEdit) {
var phone = Phones.SingleOrDefault(p=>p.Type == pt);
var index = (int) pt;
foreach (var pi in props)
{
// if phone==null , then the pv should be null too
var pv = phone==null? null: pi.GetValue(phone);
var tag = GenerateFieldForProperty(pi.Name, pv, index, pt);
output.Content.AppendHtml(tag);
}
}
// generate hidden input tags for phones
var phones= Phones.Where(p => !this.TypesToEdit.Contains((p.Type)));
foreach (var p in phones) {
var index = (int)p.Type;
foreach (var pi in props) {
var pv = pi.GetValue(p);
var tag = GenerateFieldForProperty(pi.Name,pv,index,p.Type);
output.Content.AppendHtml(tag);
}
}
}
Here the GenerateFieldForProperty is a simply helper method to generate tag builder for particular property:
private TagBuilder GenerateFieldForProperty(string propName,object propValue,int index, EnumPhoneType eType )
{
// whether current UserPhone is editable (check the PhoneType)
var editable = TypesToEdit.Contains(eType);
var expression = this.ExpressionFilter(For.Name + $"[{index}].{propName}");
var explorer = For.ModelExplorer.GetExplorerForExpression(typeof(IList<UserPhones>), o => propValue);
//IF REQUESTED TYPE AND PROPERTY SPECIFIED
if (pi.Name.NormalizeString() == PropNameToEdit.NormalizeString() && editable)
{
TagBuilder gridItem = new TagBuilder("div");
gridItem.Attributes.Add("class", "rvt-grid__item");
var labelText = this.GetLabelTextByPhoneType(eType);
gridItem.InnerHtml.AppendHtml(BuildLabel(explorer, expression, labelText));
gridItem.InnerHtml.AppendHtml(BuildTextBox(explorer, expression, propValue?.ToString()));
return gridItem;
}
else //ADD HIDDEN FIELD SO BOUND PROPERLY
return BuildHidden(explorer, expression, propValue?.ToString());
}
private string GetLabelTextByPhoneType(EnumPhoneType eType) {
string lVal = null;
switch (eType)
{
case EnumPhoneType.Home:
lVal = "Home Phone";
break;
case EnumPhoneType.Mobile:
lVal = "Mobile Phone";
break;
case EnumPhoneType.Other:
lVal = "Other Phone";
break;
default:
break;
}
return lVal;
}
When posted to server, if someone doesn't input a phone number for the other PhoneType, the actual payload will be something like:
AppUser.UserPhones[0].UserPhoneId=....&AppUser.UserPhones[0].PhoneNumber=911&....
&AppUser.UserPhones[2].UserPhoneId=&AppUser.UserPhones[2].PhoneNumber=&AppUser.UserPhones[2].Type=&AppUser.UserPhones[2].AppUserId=&AppUser.UserPhones[2].AppUser=
&AppUser.UserPhones[1].UserPhoneId=...&AppUser.UserPhones[1].PhoneNumber=119&....
Since we use phone type as the index, we can conclude that the UserPhones[0] will be used as an Mobile phone and the UserPhones[2] will be treated as an Home phone.
page handler or action method
And the model binder on server side will create a empty string for each UserPhone.
To remove those empty inputs and prevent overposting attack, we could use Linq to filter UserPhones so that we can create or update UserPhone records without empty Phones:
var editables = new[] {
EnumPhoneType.Mobile,
EnumPhoneType.Other,
};
AppUser.UserPhones = AppUser.UserPhones
.Where(p => !string.IsNullOrEmpty(p.PhoneNumber)) // remove empty inputs
.Where(p => editables.Contains(p.Type) ) // remove not editable inputs
.ToList();
// now the `UserPhones` will be clean for later use
// ... create or update user phones as you like
Let's say you want to create phones :
public IActionResult OnPostCreate() {
var editables = new[] {
EnumPhoneType.Mobile,
EnumPhoneType.Other,
};
AppUser.UserPhones = AppUser.UserPhones
.Where(p => !string.IsNullOrEmpty(p.PhoneNumber))
.Where(p => editables.Contains(p.Type) )
.Select(p => { // construct relationship for inputs
p.AppUser = AppUser;
p.AppUserId = AppUser.Id;
return p;
})
.ToList();
this._dbContext.Set<UserPhones>().AddRange(AppUser.UserPhones);
this._dbContext.SaveChanges();
return Page();
}
Test Case :
<form method="post">
<div class="row">
<user-phones
phones="#Model.AppUser.UserPhones"
asp-for="#Model.AppUser.UserPhones"
prop-name-to-edit="PhoneNumber"
types-to-edit="new EnumPhoneType[] { EnumPhoneType.Mobile, EnumPhoneType.Other}"
>
</user-phones>
</div>
<button type="submit">submit</button>
</form>
User1 who has Mobile phone and Home phone number:
User2 who wants to create a new Mobile phone number :
I've some code to update an array (add to set). The code is currently using the legacy builder:
var data = new[] { 10, 20, 30 };
var fieldName = "data";
var oldWay = Update.AddToSetEach(fieldName , new BsonArray(data));
Console.WriteLine($"Old way:\n {oldWay.ToJson()} \n\n");
This works perfectly and prints:
Old way:
{ "$addToSet" : { "users" : { "$each" : [10, 20, 30] } } }
But when trying to use the new Builders class, I can't get it to work correctly. I'm using MongoDB.Driver 2.4.4. My code:
var data = new[] { 10, 20, 30 };
var fieldName = "data";
var newWay = Builders<BsonDocument>.Update.AddToSetEach(fieldName, new BsonArray(data)).ToJson();
Console.WriteLine($"New way:\n {newWay} \n\n");
The output is:
New way:
{ "_t" : "AddToSetUpdateDefinition`2" }
Any thoughts?
Thank you!
I don't think .ToJson() renders the string how you want it to on a FilterDefinition. It used to be a little easier in the old driver.
Someone wrote an extension method though:
public static class MongoExtensions
{
public static BsonDocument RenderToBsonDocument<T>(this UpdateDefinition<T> filter)
{
var serializerRegistry = BsonSerializer.SerializerRegistry;
var documentSerializer = serializerRegistry.GetSerializer<T>();
return filter.Render(documentSerializer, serializerRegistry);
}
}
Then you can
var data = new[] { 10, 20, 30 };
var fieldName = "data";
var newWay = Builders<BsonDocument>.Update.AddToSetEach(fieldName, new BsonArray(data));
var newWayJson = newWay .RenderToBsonDocument().ToJson();
Console.WriteLine($"New way:\n {newWayJson } \n\n");
I have managed to create document and do some complex searching too but facing problem in grouping some search result.
There are books which are displayed after search which is fine. Along with this Author grouping with count need to done which will be based on same search query.
Example,
Author Name | Count
A | 12
B | 2
I am using Lucene.Net 3.0.3.0 which does not support grouping but there might be some work around. I need same feature with price ranges too.
Everything is possible if you write a custom Collector. What you describe are facets, and can easily be solved by counting the document values yourself. The core part is calling the IndexSearcher.Search overload accepting a collector. The collector should read values, usually implemented with a field-cache implementation and do the calculation needed.
This is a short demonstration using some classes from my demo-project Corelicious.Lucene.
var postTypes = new Dictionary<Int32, Int32>();
searcher.Search(query, new DelegatingCollector((reader, doc, scorer) => {
var score = scorer.Score();
if (score > 0) {
var postType = SingleFieldCache.Default.GetInt32(reader, "PostTypeId", doc);
if (postType.HasValue) {
if (postTypes.ContainsKey(postType.Value)) {
postTypes[postType.Value]++;
} else {
postTypes[postType.Value] = 1;
}
}
}
}));
Full code:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Xml;
using Corelicious.Lucene;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Directory = Lucene.Net.Store.Directory;
using Version = Lucene.Net.Util.Version;
namespace ConsoleApplication {
public static class Program {
public static void Main(string[] args) {
Console.WriteLine ("Creating directory...");
var directory = new RAMDirectory();
var analyzer = new StandardAnalyzer(Version.LUCENE_30);
CreateIndex(directory, analyzer);
var userQuery = "calculate pi";
var queryParser = new QueryParser(Version.LUCENE_30, "Body", analyzer);
var query = queryParser.Parse(userQuery);
Console.WriteLine("Query: '{0}'", query);
var indexReader = IndexReader.Open(directory, readOnly: true);
var searcher = new IndexSearcher(indexReader);
var postTypes = new Dictionary<Int32, Int32>();
searcher.Search(query, new DelegatingCollector((reader, doc, scorer) => {
var score = scorer.Score();
if (score > 0) {
var postType = SingleFieldCache.Default.GetInt32(reader, "PostTypeId", doc);
if (postType.HasValue) {
if (postTypes.ContainsKey(postType.Value)) {
postTypes[postType.Value]++;
} else {
postTypes[postType.Value] = 1;
}
}
}
}));
Console.WriteLine("Post type summary");
Console.WriteLine("Post type | Count");
foreach(var pair in postTypes.OrderByDescending(x => x.Value)) {
var postType = (PostType)pair.Key;
Console.WriteLine("{0,-10} | {1}", postType, pair.Value);
}
Console.ReadLine ();
}
public enum PostType {
Question = 1,
Answer = 2,
Tag = 4
}
public static void CreateIndex(Directory directory, Analyzer analyzer) {
using (var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
using (var xmlStream = File.OpenRead("/Users/sisve/Downloads/Stack Exchange Data Dump - Sept 2011/Content/092011 Mathematics/posts.xml"))
using (var xmlReader = XmlReader.Create(xmlStream)) {
while (xmlReader.ReadToFollowing("row")) {
var tags = xmlReader.GetAttribute("Tags") ?? String.Empty;
var title = xmlReader.GetAttribute("Title") ?? String.Empty;
var body = xmlReader.GetAttribute("Body");
var doc = new Document();
// tags are stored as <tag1><tag2>
foreach (Match match in Regex.Matches(tags, "<(.*?)>")) {
doc.Add(new Field("Tags", match.Groups[1].Value, Field.Store.NO, Field.Index.NOT_ANALYZED));
}
doc.Add(new Field("Title", title, Field.Store.NO, Field.Index.ANALYZED));
doc.Add(new Field("Body", body, Field.Store.NO, Field.Index.ANALYZED));
doc.Add(new Field("PostTypeId", xmlReader.GetAttribute("PostTypeId"), Field.Store.NO, Field.Index.NOT_ANALYZED));
writer.AddDocument(doc);
}
writer.Optimize();
writer.Commit();
}
}
}
}