How to loop a search over a long string? - ms-word

In order to get around the 255 character search limitation in the desktop Word api I'm breaking long strings into searchable chunks of 254 characters and pushing them into an object "oSearchTerms". Then I'm attempting to iterate over oSearchTerms, search for the text, highlight it, then search for the next chunk and do the same until all items in oSearchTerms have been highlighted. The problem is it's not looping. It goes through the first iteration successfully but stops.
I've tried copious context.sync() calls, return true, return context.sync(), etc, which you'll see commented out below, to no avail.
I should also point out that it's not showing any errors. The loop just isn't looping.
Do I have to convert this over to an async function? I'd like to stick with ES5 and not use fat arrow functions.
What am I missing?
var fullSearchTerm = "As discussed earlier, one of the primary objectives of these DYH rules is to ensure that operators have at least one source of XYZ-approved data and documents that they can use to comply with operational requirements The objective would be defeated if the required data and documents were not, in fact, approved and Only by retaining authority to approve these materials can we ensure that they comply with applicable requirements and can be relied upon by operators to comply with operational rules which We believe there are differences between EXSS ICA and other ICA that necessitate approval of EVIS ICA."
function findTextMatch() {
Word.run(function(context) {
OfficeExtension.config.extendedErrorLogging = true;
var oSearchTerms = [];
var maxChars = 254;
var lenFullSearchTerm = fullSearchTerm.length;
var nSearchCycles = Math.ceil(Number((lenFullSearchTerm / maxChars)));
console.log("lenFullSearchTerm: " + lenFullSearchTerm + " nSearchCycles: " + nSearchCycles);
// create oSearchTerms object containing search terms
// leaves short strings alone but breaks long strings into
// searchable 254 character chunks
for (var i = 0; i < nSearchCycles; i++) {
var posStart = i * maxChars;
var mySrch = fullSearchTerm.substr(posStart, maxChars);
console.log( i +" mySrch: "+ mySrch);
var oSrch = {"searchterm":mySrch};
oSearchTerms.push(oSrch);
}
console.log("oSearchTerms.length: " + oSearchTerms.length +" oSearchTerm: "+ JSON.stringify(oSearchTerms));
// Begin search loop
// iterate over oSearchTerms, find and highlight each searchterm
for (var i = 0; i < oSearchTerms.length; i++) {
console.log("oSearchTerms["+i+"].searchterm: " + JSON.stringify(oSearchTerms[i].searchterm));
var searchResults = context.document.body.search(oSearchTerms[i].searchterm, { matchCase: true });
console.log("do context.sync() ");
context.load(searchResults);
return context.sync()
.then(function(){
console.log("done context.sync() ");
console.log("searchResults: "+ JSON.stringify(searchResults));
if(typeof searchResults.items !== undefined){
console.log("i: "+i+ " searchResults: "+searchResults.items.length);
// highlight each result
for (var j = 0; j < searchResults.items.length; j++) {
console.log("highlight searchResults.items["+j +"]");
searchResults.items[j].font.highlightColor = "red";
}
}
else{
console.log("typeof searchResults.items == undefined");
}
// return true;
// return context.sync();
});
//.then(context.sync);
//return true;
} // end search loop
})
.catch( function (error) {
console.log('findTextMatch Error: ' + JSON.stringify(error));
if (error instanceof OfficeExtension.Error) {
console.log('findTextMatch Debug info: ' + JSON.stringify(error));
}
});
}

I recommend that you not have a context.sync inside a loop. That can be a performance hit and it makes the code hard to reason about. Please see my answer to: Document not in sync after replace text and this sample: Word Add-in Stylechecker for a design pattern that avoids this. The pattern can be used with ES5 syntax if you want.
If you implement this pattern, you may find that the problem has gone away, or at least you will be able to see clearer where the cause might be.

Related

Is it necessary to create <span> elements to register event listeners

I have a working web app that reads local .txt files and displays the content in a div element. I create a span element out of each word because I need to be able to select any word in the document and create an EEI (Essential Elements of Information) from the text. I then register a click handler on the containing div and let the event bubble up. The three functions below show reading the file, and parsing it, and populating the text div with spans:
function readInputFile(evt) {
reset();
var theFile = evt.target.files[0];
if(theFile) {
$("#theDoc").empty(); //Clean up any old docs loaded
var myReader = new FileReader();
var ta = document.getElementById("theDoc");
myReader.onload = function(e) {
parseTheDoc(e.target.result);
initialMarkup();
};
myReader.readAsText(theFile);
} else {
alert("Can not read input file: readInputFile()");
}
}
function parseTheDoc(docContents) {
var lines = docContents.split("\n");
var sentWords =[];
for(var i = 0; i < lines.length; i++) {
sentWords = lines[i].split(" ");
words = words.concat(sentWords);
words.push("<br>");
}
//examineWords(words);
createSpans(words);
}
function createSpans() {
for (var i = 0; i < words.length; i++) {
var currentWord = words[i];
if(currentWord !== "<br>") {
var $mySpan = $("<span />");
$mySpan.text(currentWord + " ");
$mySpan.attr("id", "word_" + i);
$("#theDoc").append($mySpan);
buildDocVector(currentWord, i, $mySpan);
}
else {
var $myBreak = $("<br>");
$myBreak.attr("id", "word_" + i);
$("#theDoc").append($myBreak);
buildDocVector("br", i, $myBreak);
}
}
//console.log("CreateSpans: Debug");
}
So basically a simple fileReader, split on \n, then tokenize on white space. I then create a span for each word, and a br element for each \n. It's not beautiful, but it satisfies the requirement, and works. My question is, is there a more efficient way of doing this? It just seems expensive to create all these spans, but my requirement is to annotate the doc and map any selected word to a data model/ontology. I can't think of a way to allow the user to select any word, or combination of words (control click) and then perform operations on them. This works, but with large docs (100 pages) I start having performance/memory issues. I understand this is more a design question and may not be appropriate, but I'd really like to know if there are more performant solutions.

Word web addin load whole document from server header/footer

We are trying to load a word document from server using JavaScript. We send the document using a base64 encoding. With our current approach, only the body is loading using the function:
context.document.body.insertFileFromBase64(fileContent, "replace");
Unfortunately, the header and the footer are not loading. Is there another approach to load the whole document including body and footer?
the insertFile operation does not overwrite existing header/footers in the document.
According to my research, I saw this article for using insertFileFromBase64.The article says," if you use insertFileFromBase64 to insert the file it does have this blank page with header and footer." Did you have the same issue for this?
However, another article says it's a design issue. Userform will encode data and will create an appointment on Microsoft Outlook Calendar
The article provides approach:
function getFile(){
Office.context.document.getFileAsync(Office.FileType.Compressed, { sliceSize: 4194304 /*64 KB*/ },
function (result) {
if (result.status == "succeeded") {
// If the getFileAsync call succeeded, then
// result.value will return a valid File Object.
var myFile = result.value;
var sliceCount = myFile.sliceCount;
var slicesReceived = 0, gotAllSlices = true, docdataSlices = [];
console.log("File size:" + myFile.size + " #Slices: " + sliceCount);
// Get the file slices.
getSliceAsync(myFile, 0, sliceCount, gotAllSlices, docdataSlices, slicesReceived);
}
else {
app.showNotification("Error:", result.error.message);
}
});
}
function getSliceAsync(file, nextSlice, sliceCount, gotAllSlices, docdataSlices, slicesReceived) {
file.getSliceAsync(nextSlice, function (sliceResult) {
if (sliceResult.status == "succeeded") {
if (!gotAllSlices) { // Failed to get all slices, no need to continue.
return;
}
// Got one slice, store it in a temporary array.
// (Or you can do something else, such as
// send it to a third-party server.)
docdataSlices[sliceResult.value.index] = sliceResult.value.data;
if (++slicesReceived == sliceCount) {
// All slices have been received.
file.closeAsync();
onGotAllSlices(docdataSlices);
}
else {
getSliceAsync(file, ++nextSlice, sliceCount, gotAllSlices, docdataSlices, slicesReceived);
}
}
else {
gotAllSlices = false;
file.closeAsync();
console.log("getSliceAsync Error:", sliceResult.error.message);
}
});
}
function onGotAllSlices(docdataSlices) {
var docdata = [];
for (var i = 0; i < docdataSlices.length; i++) {
docdata = docdata.concat(docdataSlices[i]);
}
var fileContent = new String();
for (var j = 0; j < docdata.length; j++) {
fileContent += String.fromCharCode(docdata[j]);
}
var mybase64 = window.btoa(fileContent);
console.log("here is the base 64", mybase64);
// Now all the file content is stored in 'fileContent' variable,
// you can do something with it, such as print, fax...
}

Using Word.SearchOptions in an Office Add-In

I am developing an Office Add-In that processes the text of each paragraph of a Word document against data in JSON format and writes the result to a within the index.html file that is rendered in the Task Pane. This works fine. I am now trying to format the strings within the Word document that correspond to the hits for the keys in the JSON data.
I have a JS block in the head of the index.html file in which I call "Office.initialize" and define a variable based on the JSON data, and have utility functions related to the above functionality. After that comes a function in which I get the Word context and process the Word file paragraphs against the JSON data, and then try to search the Word paragraph itself in order to format the hits. In this last task I am trying to reproduce a snippet from Michael Mainer 1. But no formatting happens when I activate this function. Unfortunately I don't have access to a console since I am on a Mac, which makes it harder to debug.
I would be very appreciative of someone showing me where I'm going wrong.
`function tester() {
Word.run(function (context) {
// Create a proxy object for the document's paragraphs
var paragraphs = context.document.body.paragraphs;
// Load the paragraphs' text, which I run regexes on
context.load(paragraphs, 'text');
return context.sync().then(function () {
for (var i = 0; i < paragraphs.items.length; i++) {
var text = paragraphs.items[i].text;
// jquery to iterate over the "notes" objects from the JSON
$.each(notes, function(key, value) {
var regex = new RegExp("\\b" + key + "\\b", "g");
var res = regex.test(text);
// if the regex hits...
if (res == true) {
// This part works fine, using the JSON data to append to the <DIV> with ID = "notes"
document.getElementById('notes').innerHTML += "<button onclick=hide('" + value.seqNo + "')><b>" + key + "</b></button><p class='" + value.seqNo + "' id='" + i + "'>" + value.notes[0].note + "</p>";
// I now go on to searching for these hits within the current paragraph in the Word file
var thisPara = paragraphs.items[i];
// Set up the search options.
var options = Word.SearchOptions.newObject(context);
options.matchCase = false
// Queue the commmand to search the current paragraph for occurrences of the string "key" (coming from the JSON data)
var searchResults = paragraphs.items[i].search(key, options);
// Load 'text' and 'font' for searchResults.
context.load(searchResults, 'text, font');
// Synchronize the document state by executing the queued-up commands, and return a promise to indicate task completion.
return context.sync().then(function () {
// Queue a command to change the font for each found item.
for (var j = 0; j < searchResults.items.length; j++) {
searchResults.items[j].font.color = '#FF0000'
searchResults.items[j].font.highlightColor = '#FFFF00';
searchResults.items[j].font.bold = true;
}
// Synchronize the document state by executing the queued-up commands,
// and return a promise to indicate task completion.
return context.sync();
});
}
});
}
});
})
.catch(function (error) {
console.log('Error: ' + JSON.stringify(error));
if (error instanceof OfficeExtension.Error) {
console.log('Debug info: ' + JSON.stringify(error.debugInfo));
}
});
}
`
It looks like you just need access to the font property, have you tried just:
context.load(searchResults, 'font');
That was working for me?

Retrieve reference to comment OpenXML

I am trying to pull out the text from a Word document that is referenced by a comment in OpenXML. I can easily get the text of a comment, but not the paragraph text in the document that the comment is referencing.
The image I attached shows a comment and the related text. I am having a lot of trouble finding an example of how to get the referenced text. How can I get this text?
The solution is to get the Id of the comment which as you said you already know how to retrieve, and then search the document for a CommentRangeStart element with the same Id. When you have found it, you can loop over .NextSibling() until you hit a CommentRangeEnd element.
The elements between CommentRangeStart and CommentRangeEnd is the referenced part, which obviously can be multiple runs, paragraphs, images, whatever. So you will have to handle the collected elements somehow afterwards.
I made a test document looking like this:
I've made this code to test it:
using (var wordDoc = WordprocessingDocument.Open(#"c:\test\test.docx", true))
{
MainDocumentPart mainPart = wordDoc.MainDocumentPart;
var document = mainPart.Document;
var comments = mainPart.WordprocessingCommentsPart.Comments.ChildElements;
foreach(Comment comment in comments)
{
string commentId = comment.Id;
string commentText = comment.InnerText;
OpenXmlElement rangeStart = document.Descendants<CommentRangeStart>().Where(c => c.Id == commentId).FirstOrDefault();
List<OpenXmlElement> referenced = new List<OpenXmlElement>();
rangeStart = rangeStart.NextSibling();
while(!(rangeStart is CommentRangeEnd))
{
referenced.Add(rangeStart);
rangeStart = rangeStart.NextSibling();
}
Console.WriteLine("Comment Id " + commentId + " with text \"" + " " + commentText + "\" references =>");
foreach (var ele in referenced)
{
if(!string.IsNullOrWhiteSpace(ele.InnerText))
{
Console.WriteLine(" " + ele.InnerText);
}
}
}
Console.ReadKey();
}
Which produces this output
I hope it helps!
I could not get your solution to work. However I found a workaround.
OpenXmlElement rangeStart = document.Descendants<CommentRangeStart>().Where(c => c.Id == commentId).FirstOrDefault();
bool breakLoop = false;
rangeStart = rangeStart.Parent;
while (true) // Looping through items between commentRangeStart and commentRangeEnd.
{
if (rangeStart.NextSibling() == null)
{
break;
}
foreach (var ele in rangeStart.ChildElements)
{
if (!(ele is CommentRangeEnd))
{
if (!(string.IsNullOrWhiteSpace(ele.InnerText)))
{
referenced.Add(ele);
}
}
else
{
breakLoop = true;
}
if (breakLoop)
break;
}
rangeStart = rangeStart.NextSibling();
}
Hence, instead of looping through the paragraph in which the CommenRageStart exists, since one comment may be built up of several paragraphs, I use the parent node in order to trace back and forth between the paragraphs. Finnaly, as I reach the CommentRangeEnd I can break the loop and process the data however is required.

Is there a way to auto expand objects in Chrome Dev Tools?

EVERY SINGLE TIME I view an object in the console I am going to want to expand it, so it gets tiresome to have to click the arrow to do this EVERY SINGLE TIME :) Is there a shortcut or setting to have this done automatically?
Consider using console.table().
To expand / collapse a node and all its children,
Ctrl + Alt + Click or Opt + Click on arrow icon
(note that although the dev tools doc lists Ctrl + Alt + Click, on Windows all that is needed is Alt + Click).
While the solution mentioning JSON.stringify is pretty great for most of the cases, it has a few limitations
It can not handle items with circular references where as console.log can take care of such objects elegantly.
Also, if you have a large tree, then ability to interactively fold away some nodes can make exploration easier.
Here is a solution that solves both of the above by creatively (ab)using console.group:
function expandedLog(item, maxDepth = 100, depth = 0){
if (depth > maxDepth ) {
console.log(item);
return;
}
if (typeof item === 'object' && item !== null) {
Object.entries(item).forEach(([key, value]) => {
console.group(key + ' : ' +(typeof value));
expandedLog(value, maxDepth, depth + 1);
console.groupEnd();
});
} else {
console.log(item);
}
}
Now running:
expandedLog({
"glossary": {
"title": "example glossary",
"GlossDiv": {
"title": "S",
"GlossList": {
"GlossEntry": {
"ID": "SGML",
"SortAs": "SGML",
"GlossTerm": "Standard Generalized Markup Language",
"Acronym": "SGML",
"Abbrev": "ISO 8879:1986",
"GlossDef": {
"para": "A meta-markup language, used to create markup languages such as DocBook.",
"GlossSeeAlso": ["GML", "XML"]
},
"GlossSee": "markup"
}
}
}
}
})
Will give you something like:
The value of maxDepth can be adjusted to a desired level, and beyond that level of nesting - expanded log will fall back to usual console.log
Try running something like:
x = { a: 10, b: 20 }
x.x = x
expandedLog(x)
Also please note that console.group is non-standard.
Might not be the best answer, but I've been doing this somewhere in my code.
Update:
Use JSON.stringify to expand your object automatically:
> a = [{name: 'Joe', age: 5}, {name: 'John', age: 6}]
> JSON.stringify(a, true, 2)
"[
{
"name": "Joe",
"age": 5
},
{
"name": "John",
"age": 6
}
]"
You can always make a shortcut function if it hurts to type all that out:
j = function(d) {
return JSON.stringify(d, true, 2)
}
j(a)
Previous answer:
pretty = function(d)
{
var s = []
for (var k in d) {
s.push(k + ': ' + d[k])
}
console.log(s.join(', '))
}
then, instead of:
-> a = [{name: 'Joe', age: 5}, {name: 'John', age: 6}]
-> a
<- [Object, Object]
You do:
-> a.forEach(pretty)
<- name: Joe, age: 5
name: John, age: 6
Not the best solution, but works well for my usage. Deeper objects will not work so that's something that can be improved on.
option+Click on a Mac. Just discovered it now myself and have made my week! This has been as annoying as anything
By default the console on Chrome and Safari browsers will output objects which are collapsed, with sorted property keys, and include all inherited prototype chains.
I'm personally not a fan. Most developers need raw output of an object without the prototype chain, and anything else should be opt-in. Collapsed objects waste the developer's time, because they need to expand them, and if they wanted less output they could just log the property keys they need. Auto-sorting the property keys, leaves the developer without a way to check if their own sort works correctly, which could cause bugs. And lastly, the common Javascript developer does not spend much time working on the inherited prototype chain, so that adds noise to the logs.
How to expand objects in Console
Recommended
console.log(JSON.stringify({}, undefined, 2));
Could also use as a function:
console.json = object => console.log(JSON.stringify(object, undefined, 2));
console.json({});
"Option + Click" (Chrome on Mac) and "Alt + Click" (Chrome on Window)
However, it's not supported by all browsers (e.g. Safari), and Console still prints the prototype chains, auto-sorts property keys, etc.
Not Recommended
I would not recommend either of the top answers
console.table() - this is shallow expansion only, and does not expand nested objects
Write a custom underscore.js function - too much overhead for what should be a simple solution
Here is a modified version of lorefnon's answer which does not depend on underscorejs:
var expandedLog = (function(MAX_DEPTH){
return function(item, depth){
depth = depth || 0;
isString = typeof item === 'string';
isDeep = depth > MAX_DEPTH
if (isString || isDeep) {
console.log(item);
return;
}
for(var key in item){
console.group(key + ' : ' +(typeof item[key]));
expandedLog(item[key], depth + 1);
console.groupEnd();
}
}
})(100);
Here is my solution, a function that iterates an all the properties of the object, including arrays.
In this example I iterate over a simple multi-level object:
var point = {
x: 5,
y: 2,
innerobj : { innerVal : 1,innerVal2 : 2 },
$excludedInnerProperties : { test: 1},
includedInnerProperties : { test: 1}
};
You have also the possibility to exclude the iteration if the properties starts with a particular suffix (i.e. $ for angular objects)
discoverProperties = function (obj, level, excludePrefix) {
var indent = "----------------------------------------".substring(0, level * 2);
var str = indent + "level " + level + "\r\n";
if (typeof (obj) == "undefined")
return "";
for (var property in obj) {
if (obj.hasOwnProperty(property)) {
var propVal;
try {
propVal = eval('obj.' + property);
str += indent + property + "(" + propVal.constructor.name + "):" + propVal + "\r\n";
if (typeof (propVal) == 'object' && level < 10 && propVal.constructor.name != "Date" && property.indexOf(excludePrefix) != 0) {
if (propVal.hasOwnProperty('length')) {
for (var i = 0; i < propVal.length; i++) {
if (typeof (propVal) == 'object' && level < 10) {
if (typeof (propVal[i]) != "undefined") {
str += indent + (propVal[i]).constructor.name + "[" + i + "]\r\n";
str += this.discoverProperties(propVal[i], level + 1, excludePrefix);
}
}
else
str += indent + propVal[i].constructor.name + "[" + i + "]:" + propVal[i] + "\r\n";
}
}
else
str += this.discoverProperties(propVal, level + 1, excludePrefix);
}
}
catch (e) {
}
}
}
return str;
};
var point = {
x: 5,
y: 2,
innerobj : { innerVal : 1,innerVal2 : 2 },
$excludedInnerProperties : { test: 1},
includedInnerProperties : { test: 1}
};
document.write("<pre>" + discoverProperties(point,0,'$')+ "</pre>");
Here is the output of the function:
level 0
x(Number):5
y(Number):2
innerobj(Object):[object Object]
--level 1
--innerVal(Number):1
--innerVal2(Number):2
$excludedInnerProperties(Object):[object Object]
includedInnerProperties(Object):[object Object]
--level 1
--test(Number):1
You can also inject this function in any web page and copy and analyze all the properties, try in on the google page using the chrome command:
discoverProperties(google,0,'$')
Also you can copy the output of the command using the chrome command:
copy(discoverProperties(myvariable,0,'$'))
if you have a big object, JSON.stringfy will give error Uncaught TypeError: Converting circular structure to JSON
, here is trick to use modified version of it
JSON.stringifyOnce = function(obj, replacer, indent){
var printedObjects = [];
var printedObjectKeys = [];
function printOnceReplacer(key, value){
if ( printedObjects.length > 2000){ // browsers will not print more than 20K, I don't see the point to allow 2K.. algorithm will not be fast anyway if we have too many objects
return 'object too long';
}
var printedObjIndex = false;
printedObjects.forEach(function(obj, index){
if(obj===value){
printedObjIndex = index;
}
});
if ( key == ''){ //root element
printedObjects.push(obj);
printedObjectKeys.push("root");
return value;
}
else if(printedObjIndex+"" != "false" && typeof(value)=="object"){
if ( printedObjectKeys[printedObjIndex] == "root"){
return "(pointer to root)";
}else{
return "(see " + ((!!value && !!value.constructor) ? value.constructor.name.toLowerCase() : typeof(value)) + " with key " + printedObjectKeys[printedObjIndex] + ")";
}
}else{
var qualifiedKey = key || "(empty key)";
printedObjects.push(value);
printedObjectKeys.push(qualifiedKey);
if(replacer){
return replacer(key, value);
}else{
return value;
}
}
}
return JSON.stringify(obj, printOnceReplacer, indent);
};
now you can use JSON.stringifyOnce(obj)
Its a work around, but it works for me.
I use in the case where a control/widget auto updates depending on user actions. For example, when using twitter's typeahead.js, once you focus out of the window, the dropdown disappears and the suggestions get removed from the DOM.
In dev tools right click on the node you want to expand enable break on... -> subtree modifications, this will then send you to the debugger. Keep hitting F10 or Shift+F11 untill you dom mutates. Once that mutates then you can inspect. Since the debugger is active the UI of Chrome is locked and doesn't close the dropdown and the suggestions are still in the DOM.
Very handy when troubleshooting layout of dynamically inserted nodes that are begin inserted and removed constantly.
Another easier way would be
Use JSON.stringify(jsonObject)
Copy and Paste the result to Visual Studio Code
Use Ctrl+K and Ctrl+F to format the result
You will see formatted expanded object
I have tried this for simple objects.
You can package JSON.stringify into a new function eg
jsonLog = function (msg, d) {
console.log(msg + '\n' + JSON.stringify(d, true, 2))
}
then
jsonLog('root=', root)
FWIW.
Murray
For lazy folks
/**
* _Universal extensive multilevel logger for lazy folks_
* #param {any} value **`Value` you want to log**
* #param {number} tab **Abount of `tab`**
*/
function log(value, tab = 4) {
console.log(JSON.stringify(value, undefined, tab));
}
Usage
log(anything) // [] {} 1 true null
Alt-click will expand all child nodes in the Chrome console.
You could view your element by accessing document.getElementsBy... and then right click and copy of the resulted object. For example:
document.getElementsByTagName('ion-app') gives back javascript object that can be copy pasted to text editor and it does it in full.
Better yet: right click on the resulted element - 'Edit as html' - 'Select all' - 'Copy' - 'Paste'