How I can make dynamic header column and dynamic data to export to Excel/CSV/PDF using MongoDB, Spring Boot, and apache poi - mongodb

I want to make export function using Spring Boot, I have data on MongoDB NoSQL, and then want to export my document on MongoDB Dynamically using Apache POI ( If any better dependency you can recommend to me).
I don't want to declare header column, entity model, etc., I want to export data dynamically as shown as in my Document database, any one can give me an example for it?

Please try these code may be help for you.
->add Gson Depandencies in porm.xml
Controller code
MasterController.java
#Autowired
MasterServiceImpl masterServiceImpl;
#GetMapping(value="/dynamicfile/{flag}/{fileType}/{fileName}")
public ResponseEntity<InputStreamResource> downloadsFiles(#PathVariable("flag") int flag,#PathVariable("fileType") String fileType,#PathVariable("fileName") String fileName) throws IOException{
List<?> objects=new ArrayList<>();
if(flag==1) {
objects=masterServiceImpl.getData();
}
ByteArrayInputStream in = masterServiceImpl.downloadsFiles(objects,fileType);
HttpHeaders headers = new HttpHeaders();
if(fileType.equals("Excel")) {
headers.add("Content-Disposition", "attachment; filename="+fileName+".xlsx");
}else if(fileType.equals("Pdf")){
headers.add("Content-Disposition", "attachment; filename="+fileName+".pdf");
}else if(fileType.equals("Csv")) {
headers.add("Content-Disposition", "attachment; filename="+fileName+".csv");
}
return ResponseEntity.ok().headers(headers).body(new InputStreamResource(in));
}
Service code :
MasterServiceImpl.java
public static List<HashMap<Object, Object>> getListOfObjectToListOfHashMap(List<?> objects) {
List<HashMap<Object,Object>> list=new ArrayList<>();
for(int i=0;i<objects.size();i++) {
HashMap<Object,Object> map=new HashMap<>();
String temp=new Gson().toJson(objects.get(i)).toString();
String temo1= temp.substring(1, temp.length()-1);
String[] temp2=temo1.split(",");
for(int j=-1;j<temp2.length;j++) {
if(j==-1) {
map.put("SrNo",i+1);
}else {
String tempKey=temp2[j].toString().split(":")[0].toString();
String tempValue=temp2[j].toString().split(":")[1].toString();
char ch=tempValue.charAt(0);
if(ch=='"') {
map.put(tempKey.substring(1, tempKey.length()-1), tempValue.substring(1, tempValue.length()-1));
}else {
map.put(tempKey.substring(1, tempKey.length()-1), tempValue);
}
}
}
list.add(map);
}
return list;
}
public static ByteArrayInputStream downloadsFiles(List<?> objects,String fileType) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
List<HashMap<Object, Object>> list = getListOfObjectToListOfHashMap(objects);
String[] COLUMNs = getColumnsNameFromListOfObject(objects);
try{
if(fileType.equals("Excel")) {
generateExcel(list, COLUMNs,out);
}else if(fileType.equals("Pdf")) {
generatePdf(list, COLUMNs,out);
}else if(fileType.equals("Csv")) {
generatePdf(list, COLUMNs,out);
}
}catch(Exception ex) {
System.out.println("Error occurred:"+ ex);
}
return new ByteArrayInputStream(out.toByteArray());
}
public static final String[] getColumnsNameFromListOfObject(List<?> objects) {
String strObjects=new Gson().toJson(objects.get(0)).toString();
String[] setHeader= strObjects.substring(1, strObjects.length()-1).split(",");
String header="SrNo";
for(int i=0;i<setHeader.length;i++) {
String str=setHeader[i].toString().split(":")[0].toString();
header=header+","+str.substring(1, str.length()-1);
}
return header.split(",");
}
public static final void generateExcel(List<HashMap<Object, Object>> list, String[] COLUMNs,ByteArrayOutputStream out) throws IOException {
Workbook workbook = new XSSFWorkbook();
Sheet sheet = workbook.createSheet("Excelshit");
Font headerFont = workbook.createFont();
headerFont.setBold(true);
headerFont.setColor(IndexedColors.BLUE.getIndex());
CellStyle headerCellStyle = workbook.createCellStyle();
headerCellStyle.setFont(headerFont);
Row headerRow = sheet.createRow(0);
for (int col = 0; col < COLUMNs.length; col++) {
Cell cell = headerRow.createCell(col);
cell.setCellValue(COLUMNs[col]);
cell.setCellStyle(headerCellStyle);
}
int rowIdx = 1;
for(int k = 0; k < list.size(); k++){
Row row =sheet.createRow(rowIdx++);
for (Map.Entry<Object, Object> entry : list.get(k).entrySet()){
Object key = entry.getKey();
Object value = entry.getValue();
for (int col = 0; col < COLUMNs.length; col++) {
if(key.toString().equals(COLUMNs[col].toString())) {
row.createCell(col).setCellValue(value.toString());
}
}
}
}
workbook.write(out);
System.out.println(workbook);
}
private static final void generatePdf(List<HashMap<Object, Object>> list, String[] COLUMNs,ByteArrayOutputStream out) throws DocumentException {
Document document = new Document();
com.itextpdf.text.Font headerFont =FontFactory.getFont(FontFactory.HELVETICA_BOLD,8);
com.itextpdf.text.Font dataFont =FontFactory.getFont(FontFactory.TIMES_ROMAN,8);
PdfPCell hcell=null;
PdfPTable table = new PdfPTable(COLUMNs.length);
for (int col = 0; col < COLUMNs.length; col++) {
hcell = new PdfPCell(new Phrase(COLUMNs[col], headerFont));
hcell.setHorizontalAlignment(Element.ALIGN_CENTER);
table.addCell(hcell);
}
for(int index = 0; index < list.size(); index++){
PdfPCell cell = null;
for (Map.Entry<Object, Object> entry : list.get(index).entrySet()){
Object key = entry.getKey();
Object value = entry.getValue();
for (int col = 0; col < COLUMNs.length; col++) {
if(key.toString().equals(COLUMNs[col].toString())) {
cell = new PdfPCell(new Phrase(value.toString(),dataFont));
cell.setVerticalAlignment(Element.ALIGN_MIDDLE);
cell.setHorizontalAlignment(Element.ALIGN_CENTER);
}
}
table.addCell(cell);
}
}
PdfWriter.getInstance(document, out);
document.open();
document.add(table);
document.close();
}
public List<TblDepartment> getData() {
List<TblDepartment> list = new ArrayList<>();
departmentRepository.findAll().forEach(list::add);
return list;
}

Related

How to replace text of Paragraph in Itext?

I want to set page number when I merge pdf files. The page number paragraph will defind by someone to custom style what he want. Now I can add text to the paragraph(like doc.add(paragraph.add(text))), but I can not replace it.
public static byte[] mergePdf(Map<String, PdfDocument> filesToMerge, Paragraph paragraph) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PdfDocument pdfDoc = new PdfDocument(new PdfWriter(baos));
Document doc = new Document(pdfDoc);
pdfDoc.initializeOutlines();
PdfPageFormCopier formCopier = new PdfPageFormCopier();
int page = 1;
for (Map.Entry<String, PdfDocument> entry : filesToMerge.entrySet()) {
String title = entry.getKey();
PdfDocument srcDoc = entry.getValue();
int numberOfPages = srcDoc.getNumberOfPages();
for (int i = 1; i <= numberOfPages; i++, page++) {
Text text = new Text(String.format("page %d", page));
srcDoc.copyPagesTo(i, i, pdfDoc, formCopier);
if (i == 1) {
text.setDestination("p" + page);
PdfOutline rootOutLine = pdfDoc.getOutlines(false);
PdfOutline outline = rootOutLine.addOutline(title);
outline.addDestination(PdfDestination.makeDestination(new PdfString("p" + page)));
}
// I want do like "doc.add(paragraph.set(text))";
// paragraph already have been set position,font,fontSize and so on. SO I dont want to "new Paragraph(text)"
doc.add(paragraph.add(text));
}
}
for (PdfDocument srcDoc : filesToMerge.values()) {
srcDoc.close();
}
doc.close();
return baos.toByteArray();
}
I am the questioner. At last I resolve the question do like this:
public class MyParagraph extends Paragraph {
public MyParagraph() {
}
public void setContent(Text text) {
List<IElement> children = this.getChildren();
if (!children.isEmpty()) {
children.clear();
}
children.add(text);
}
}

Creating custom plugin for chinese tokenization

I'm working towards properly integrating the stanford segmenter within SOLR for chinese tokenization.
This plugin involves loading other jar files and model files. I've got it working in a crude manner by hardcoding the complete path for the files.
I'm looking for methods to create the plugin where the paths need not be hardcoded and also to have the plugin in conformance with the SOLR plugin architecture. Please let me know if there are any recommended sites or tutorials for this.
I've added my code below :
public class ChineseTokenizerFactory extends TokenizerFactory {
/** Creates a new WhitespaceTokenizerFactory */
public ChineseTokenizerFactory(Map<String,String> args) {
super(args);
assureMatchVersion();
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
}
#Override
public ChineseTokenizer create(AttributeFactory factory, Reader input) {
Reader processedStringReader = new ProcessedStringReader(input);
return new ChineseTokenizer(luceneMatchVersion, factory, processedStringReader);
}
}
public class ProcessedStringReader extends java.io.Reader {
private static final int BUFFER_SIZE = 1024 * 8;
//private static TextProcess m_textProcess = null;
private static final String basedir = "/home/praveen/PDS_Meetup/solr-4.9.0/custom_plugins/";
static Properties props = null;
static CRFClassifier<CoreLabel> segmenter = null;
private char[] m_inputData = null;
private int m_offset = 0;
private int m_length = 0;
public ProcessedStringReader(Reader input){
char[] arr = new char[BUFFER_SIZE];
StringBuffer buf = new StringBuffer();
int numChars;
if(segmenter == null)
{
segmenter = new CRFClassifier<CoreLabel>(getProperties());
segmenter.loadClassifierNoExceptions(basedir + "ctb.gz", getProperties());
}
try {
while ((numChars = input.read(arr, 0, arr.length)) > 0) {
buf.append(arr, 0, numChars);
}
} catch (IOException e) {
e.printStackTrace();
}
m_inputData = processText(buf.toString()).toCharArray();
m_offset = 0;
m_length = m_inputData.length;
}
#Override
public int read(char[] cbuf, int off, int len) throws IOException {
int charNumber = 0;
for(int i = m_offset + off;i<m_length && charNumber< len; i++){
cbuf[charNumber] = m_inputData[i];
m_offset ++;
charNumber++;
}
if(charNumber == 0){
return -1;
}
return charNumber;
}
#Override
public void close() throws IOException {
m_inputData = null;
m_offset = 0;
m_length = 0;
}
public String processText(String inputText)
{
List<String> segmented = segmenter.segmentString(inputText);
String output = "";
if(segmented.size() > 0)
{
output = segmented.get(0);
for(int i=1;i<segmented.size();i++)
{
output = output + " " +segmented.get(i);
}
}
System.out.println(output);
return output;
}
static Properties getProperties()
{
if (props == null) {
props = new Properties();
props.setProperty("sighanCorporaDict", basedir);
// props.setProperty("NormalizationTable", "data/norm.simp.utf8");
// props.setProperty("normTableEncoding", "UTF-8");
// below is needed because CTBSegDocumentIteratorFactory accesses it
props.setProperty("serDictionary",basedir+"dict-chris6.ser.gz");
props.setProperty("inputEncoding", "UTF-8");
props.setProperty("sighanPostProcessing", "true");
}
return props;
}
}
public final class ChineseTokenizer extends CharTokenizer {
public ChineseTokenizer(Version matchVersion, Reader in) {
super(matchVersion, in);
}
public ChineseTokenizer(Version matchVersion, AttributeFactory factory, Reader in) {
super(matchVersion, factory, in);
}
/** Collects only characters which do not satisfy
* {#link Character#isWhitespace(int)}.*/
#Override
protected boolean isTokenChar(int c) {
return !Character.isWhitespace(c);
}
}
You can pass the argument through the Factory's args parameter.

Copying fields in iTextSharp 5.4.5.0

I was under the impression that it is now possible to copy AcroFields using PdfCopy. In the release notes for iText 5.4.4.0 this is listed as possible now. However, when I try to do so it appears all the annotations (I think I am using that term correctly, still fairly new to iText...) for the fields are stripped out. It looks like the fields are there (meaning I can see the blue boxes that indicate an editable field), but they are not editable. If I try to bring the PDF up in Acrobat I get a message saying that "there are no fields, would you like Acrobat to discover them?" and most are found and marked and fields properly (check boxes aren't, but the text fields are).
I assume there is an additional step somewhere along the lines to re-add the annotations to the PdfCopy object, but I do not see a way to get the annotations from the PdfReader. I also cannot seem to find any documentation on how to do this (since AcroFields were for so long not supported in PdfCopy most of what I find is along that vein).
Due to sensitivity I cannot provide a copy of the PDF's in question, but using an altered version of a test program used earlier you can see the issue with the following code. It should generate a table with some check boxes in the four right columns. If I use the exact same code with PdfCopyFields in the MergePdfs method instead of PdfCopy it works as expected. This code does not produce any text fields, but in my main project they are part of the original parent PDF that is used as a template.
(Sorry for the long example, it has been cherry picked from a much larger application. You will need a PDF with a field named "TableStartPosition" somewhere in it and update RunTest with the correct paths for your local machine to get this to work.)
Has the PdfCopy functionality not made it into iTextSharp yet? I am using version 5.4.5.0.
class Program
{
Stream _pdfTemplateStream;
MemoryStream _pdfResultStream;
PdfReader _pdfTemplateReader;
PdfStamper _pdfResultStamper;
static void Main(string[] args)
{
Program p = new Program();
try
{
p.RunTest();
}
catch (Exception f)
{
Console.WriteLine(f.Message);
Console.ReadLine();
}
}
internal void RunTest()
{
FileStream fs = File.OpenRead(#"C:\temp\a\RenameFieldTest\RenameFieldTest\Library\CoverPage.pdf");
_pdfTemplateStream = fs;
_pdfResultStream = new MemoryStream();
//PDFTemplateStream = new FileStream(_templatePath, FileMode.Open);
_pdfTemplateReader = new PdfReader(_pdfTemplateStream);
_pdfResultStamper = new PdfStamper(_pdfTemplateReader, _pdfResultStream);
#region setup objects
List<CustomCategory> Categories = new List<CustomCategory>();
CustomCategory c1 = new CustomCategory();
c1.CategorySizesInUse.Add(CustomCategory.AvailableSizes[1]);
c1.CategorySizesInUse.Add(CustomCategory.AvailableSizes[2]);
Categories.Add(c1);
CustomCategory c2 = new CustomCategory();
c2.CategorySizesInUse.Add(CustomCategory.AvailableSizes[0]);
c2.CategorySizesInUse.Add(CustomCategory.AvailableSizes[1]);
Categories.Add(c2);
List<CustomObject> Items = new List<CustomObject>();
CustomObject co1 = new CustomObject();
co1.Category = c1;
co1.Title = "Object 1";
Items.Add(co1);
CustomObject co2 = new CustomObject();
co2.Category = c2;
co2.Title = "Object 2";
Items.Add(co2);
#endregion
FillCoverPage(Items);
_pdfResultStamper.Close();
_pdfTemplateReader.Close();
List<MemoryStream> pdfStreams = new List<MemoryStream>();
pdfStreams.Add(new MemoryStream(_pdfResultStream.ToArray()));
MergePdfs(#"C:\temp\a\RenameFieldTest\RenameFieldTest\Library\Outfile.pdf", pdfStreams);
_pdfResultStream.Dispose();
_pdfTemplateStream.Dispose();
}
internal void FillCoverPage(List<CustomObject> Items)
{
//Before we start we need to figure out where to start adding the table
var fieldPositions = _pdfResultStamper.AcroFields.GetFieldPositions("TableStartPosition");
if (fieldPositions == null)
{ throw new Exception("Could not find the TableStartPosition field. Unable to determine point of origin for the table!"); }
_pdfResultStamper.AcroFields.RemoveField("TableStartPosition");
var fieldPosition = fieldPositions[0];
// Get the position of the field
var targetPosition = fieldPosition.position;
//First, get all the available card sizes
List<string> availableSizes = CustomCategory.AvailableSizes;
//Generate a table with the number of available card sizes + 1 for the device name
PdfPTable table = new PdfPTable(availableSizes.Count + 1);
float[] columnWidth = new float[availableSizes.Count + 1];
for (int y = 0; y < columnWidth.Length; y++)
{
if (y == 0)
{ columnWidth[y] = 320; }
else
{ columnWidth[y] = 120; }
}
table.SetTotalWidth(columnWidth);
table.WidthPercentage = 100;
PdfContentByte canvas;
List<PdfFormField> checkboxes = new List<PdfFormField>();
//Build the header row
table.Rows.Add(new PdfPRow(this.GetTableHeaderRow(availableSizes)));
//Insert the global check boxes
PdfPCell[] globalRow = new PdfPCell[availableSizes.Count + 1];
Phrase tPhrase = new Phrase("Select/Unselect All");
PdfPCell tCell = new PdfPCell();
tCell.BackgroundColor = BaseColor.LIGHT_GRAY;
tCell.AddElement(tPhrase);
globalRow[0] = tCell;
for (int x = 0; x < availableSizes.Count; x++)
{
tCell = new PdfPCell();
tCell.BackgroundColor = BaseColor.LIGHT_GRAY;
PdfFormField f = PdfFormField.CreateCheckBox(_pdfResultStamper.Writer);
string fieldName = string.Format("InkSaver.Global.chk{0}", availableSizes[x].Replace(".", ""));
//f.FieldName = fieldName;
string js = string.Format("hideAll(event.target, '{0}');", availableSizes[x].Replace(".", ""));
f.Action = PdfAction.JavaScript(js, _pdfResultStamper.Writer);
tCell.CellEvent = new ChildFieldEvent(_pdfResultStamper.Writer, f, fieldName);
globalRow[x + 1] = tCell;
checkboxes.Add(f);
}
table.Rows.Add(new PdfPRow(globalRow));
int status = 0;
int pageNum = 1;
for (int itemIndex = 0; itemIndex < Items.Count; itemIndex++)
{
tCell = new PdfPCell();
Phrase p = new Phrase(Items[itemIndex].Title);
tCell.AddElement(p);
tCell.HorizontalAlignment = Element.ALIGN_LEFT;
PdfPCell[] cells = new PdfPCell[availableSizes.Count + 1];
cells[0] = tCell;
for (int availCardSizeIndex = 0; availCardSizeIndex < availableSizes.Count; availCardSizeIndex++)
{
if (Items[itemIndex].Category.CategorySizesInUse.Contains(availableSizes[availCardSizeIndex]))
{
string str = availableSizes[availCardSizeIndex];
tCell = new PdfPCell();
tCell.PaddingLeft = 10f;
tCell.PaddingRight = 10f;
cells[availCardSizeIndex + 1] = tCell;
cells[availCardSizeIndex].HorizontalAlignment = Element.ALIGN_CENTER;
PdfFormField f = PdfFormField.CreateCheckBox(_pdfResultStamper.Writer);
string fieldName = string.Format("InkSaver.chk{0}.{1}", availableSizes[availCardSizeIndex].Replace(".", ""), itemIndex + 1);
//f.FieldName = fieldName; <-- This causes the checkbox to be double-named (i.e. InkSaver.Global.chk0.InkSaver.Global.chk0
string js = string.Format("hideCardSize(event.target, {0}, '{1}');", itemIndex + 1, availableSizes[availCardSizeIndex]);
f.Action = PdfAction.JavaScript(js, _pdfResultStamper.Writer);
tCell.CellEvent = new ChildFieldEvent(_pdfResultStamper.Writer, f, fieldName);
checkboxes.Add(f);
}
else
{
//Add a blank cell
tCell = new PdfPCell();
cells[availCardSizeIndex + 1] = tCell;
}
}
//Test if the column text will fit
table.Rows.Add(new PdfPRow(cells));
canvas = _pdfResultStamper.GetUnderContent(pageNum);
ColumnText ct2 = new ColumnText(canvas);
ct2.AddElement(new PdfPTable(table));
ct2.Alignment = Element.ALIGN_LEFT;
ct2.SetSimpleColumn(targetPosition.Left, 0, targetPosition.Right, targetPosition.Top, 0, 0);
status = ct2.Go(true);
if ((status != ColumnText.NO_MORE_TEXT) || (itemIndex == (Items.Count - 1)))
{
ColumnText ct3 = new ColumnText(canvas);
ct3.AddElement(table);
ct3.Alignment = Element.ALIGN_LEFT;
ct3.SetSimpleColumn(targetPosition.Left, 0, targetPosition.Right, targetPosition.Top, 0, 0);
ct3.Go();
foreach (PdfFormField f in checkboxes)
{
_pdfResultStamper.AddAnnotation(f, pageNum);
}
checkboxes.Clear();
if (itemIndex < (Items.Count - 1))
{
pageNum++;
_pdfResultStamper.InsertPage(pageNum, _pdfTemplateReader.GetPageSize(1));
table = new PdfPTable(availableSizes.Count + 1);
table.SetTotalWidth(columnWidth);
table.WidthPercentage = 100;
table.Rows.Add(new PdfPRow(this.GetTableHeaderRow(availableSizes)));
}
}
}
}
private PdfPCell[] GetTableHeaderRow(List<string> AvailableSizes)
{
PdfPCell[] sizeHeaders = new PdfPCell[AvailableSizes.Count + 1];
Phrase devName = new Phrase("Device Name");
PdfPCell deviceHeader = new PdfPCell(devName);
deviceHeader.HorizontalAlignment = Element.ALIGN_CENTER;
deviceHeader.BackgroundColor = BaseColor.GRAY;
sizeHeaders[0] = deviceHeader;
for (int x = 0; x < AvailableSizes.Count; x++)
{
PdfPCell hCell = new PdfPCell(new Phrase(AvailableSizes[x]));
hCell.HorizontalAlignment = Element.ALIGN_CENTER;
hCell.BackgroundColor = BaseColor.GRAY;
sizeHeaders[x + 1] = hCell;
}
return sizeHeaders;
}
public void MergePdfs(string filePath, List<MemoryStream> pdfStreams)
{
//Create output stream
FileStream outStream = new FileStream(filePath, FileMode.Create);
Document document = null;
if (pdfStreams.Count > 0)
{
try
{
int PageCounter = 0;
//Create Main reader
PdfReader reader = new PdfReader(pdfStreams[0]);
PageCounter = reader.NumberOfPages;//This is if we have multiple pages in the cover page, we need to adjust the offset.
//rename fields in the PDF. This is required because PDF's cannot have more than one field with the same name
RenameFields(reader, PageCounter++);
//Create Main Doc
document = new Document(reader.GetPageSizeWithRotation(1));
//Create main writer
PdfCopy Writer = new PdfCopy(document, outStream);
//PdfCopyFields Writer = new PdfCopyFields(outStream);
//Open document for writing
document.Open();
////Add pages
Writer.AddDocument(reader);
//For each additional pdf after first combine them into main document
foreach (var PdfStream in pdfStreams.Skip(1))
{
PdfReader reader2 = new PdfReader(PdfStream);
//rename PDF fields
RenameFields(reader2, PageCounter++);
// Add content
Writer.AddDocument(reader);
}
//Writer.AddJavaScript(PostProcessing.GetSuperscriptJavaScript());
Writer.Close();
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
}
finally
{
if (document != null)
document.Close();
foreach (var Strm in pdfStreams)
{
try { if (null != Strm) Strm.Dispose(); }
catch { }
}
//pdfStamper.Close();
outStream.Close();
}
}
}
private void RenameFields(PdfReader reader, int PageNum)
{
int tempPageNum = 1;
//rename all fields
foreach (string field in reader.AcroFields.Fields.Keys)
{
if (((reader.AcroFields.GetFieldType(field) == 1) || (reader.AcroFields.GetFieldType(field) == 2)) && (field.StartsWith("InkSaver")))
{
//This is a InkSaver button, set the name so its subclassed
string classPath;
if (reader.AcroFields.GetFieldType(field) == 2)
{
classPath = field.Substring(0, field.LastIndexOf("."));
if (field.StartsWith("InkSaver.chk"))
{
int a = field.LastIndexOf(".");
string sub = field.Substring(a + 1, (field.Length - a - 1));
int pageNum = int.Parse(sub);
int realPageNum = pageNum + tempPageNum;//PostProcessing.Instance.CoverPageLength;
PageNum = realPageNum;
}
}
else
{
classPath = field.Substring(0, field.LastIndexOf("."));
}
string newID = classPath + ".page" + PageNum.ToString();
bool ret = reader.AcroFields.RenameField(field, newID);
}
else
{
reader.AcroFields.RenameField(field, field + "_" + PageNum.ToString());// field + Guid.NewGuid().ToString("N"));
}
}
}
}
public class ChildFieldEvent : IPdfPCellEvent
{
protected PdfWriter writer;
protected PdfFormField parent;
protected string checkBoxName;
internal ChildFieldEvent(PdfWriter writer, PdfFormField parent, string CheckBoxName)
{
this.writer = writer;
this.parent = parent;
this.checkBoxName = CheckBoxName;
}
public void CellLayout(PdfPCell cell, Rectangle rect, PdfContentByte[] cb)
{
createCheckboxField(rect);
}
private void createCheckboxField(Rectangle rect)
{
RadioCheckField bt = new RadioCheckField(this.writer, rect, this.checkBoxName, "Yes");
bt.CheckType = RadioCheckField.TYPE_SQUARE;
bt.Checked = true;
this.parent.AddKid(bt.CheckField);
}
}
internal class CustomCategory
{
internal static List<string> AvailableSizes
{
get
{
List<string> retVal = new List<string>();
retVal.Add("1");
retVal.Add("2");
retVal.Add("3");
retVal.Add("4");
return retVal;
}
}
internal CustomCategory()
{
CategorySizesInUse = new List<string>();
}
internal List<string> CategorySizesInUse { get; set; }
}
internal class CustomObject
{
internal string Title { get; set; }
internal CustomCategory Category { get;set; }
}
Please take a look at the MergeForms example. Your example is too long for me to read, but at first sight, I'm missing the following line:
copy.setMergeFields();
By the way, in MergeForms2, the fields are also renamed before the form is merged.

How to implement boolean retrieval using hitcollector in below scenario

I am running my code on TREC documents and right now implementing scoring scheme to get number of relevant documents. However now i want to implement boolean retrieval, I am trying to use HitCollector.
below is my code..
public class BatchSearch {
private BatchSearch() {}
/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
String usage =
"Usage:\tjava BatchSearch [-index dir] [-simfn similarity] [-field f] [-queries file]";
if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
System.out.println(usage);
System.out.println("Supported similarity functions:\ndefault: DefaultSimilary (tfidf)\n");
System.exit(0);
}
String index = "index";
String field = "contents";
String queries = null;
String simstring = "default";
for(int i = 0;i < args.length;i++) {
if ("-index".equals(args[i])) {
index = args[i+1];
i++;
} else if ("-field".equals(args[i])) {
field = args[i+1];
i++;
} else if ("-queries".equals(args[i])) {
queries = args[i+1];
i++;
} else if ("-simfn".equals(args[i])) {
simstring = args[i+1];
i++;
}
}
Similarity simfn = null;
if ("default".equals(simstring)) {
simfn = new DefaultSimilarity();
} else if ("bm25".equals(simstring)) {
simfn = new BM25Similarity();
} else if ("dfr".equals(simstring)) {
simfn = new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH2());
} else if ("lm".equals(simstring)) {
simfn = new LMDirichletSimilarity();
}
if (simfn == null) {
System.out.println(usage);
System.out.println("Supported similarity functions:\ndefault: DefaultSimilary (tfidf)");
System.out.println("bm25: BM25Similarity (standard parameters)");
System.out.println("dfr: Divergence from Randomness model (PL2 variant)");
System.out.println("lm: Language model, Dirichlet smoothing");
System.exit(0);
}
IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
IndexSearcher searcher = new IndexSearcher(reader);
searcher.setSimilarity(simfn);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41);
BufferedReader in = null;
if (queries != null) {
in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
} else {
in = new BufferedReader(new InputStreamReader(new FileInputStream("queries"), "UTF-8"));
}
QueryParser parser = new QueryParser(Version.LUCENE_41, field, analyzer);
while (true) {
String line = in.readLine();
if (line == null || line.length() == -1) {
break;
}
line = line.trim();
if (line.length() == 0) {
break;
}
String[] pair = line.split(" ", 2);
Query query = parser.parse(pair[1]);
doBatchSearch(in, searcher, pair[0], query, simstring);
}
reader.close();
}
/**
* This function performs a top-1000 search for the query as a basic TREC run.
*/
public static void doBatchSearch(BufferedReader in, IndexSearcher searcher, String qid, Query query, String runtag)
throws IOException {
// Collect enough docs to show 5 pages
TopDocs results = searcher.search(query, 1000);
ScoreDoc[] hits = results.scoreDocs;
HashMap<String, String> seen = new HashMap<String, String>(1000);
int numTotalHits = results.totalHits;
int start = 0;
int end = Math.min(numTotalHits, 1000);
for (int i = start; i < end; i++) {
Document doc = searcher.doc(hits[i].doc);
String docno = doc.get("docno");
// There are duplicate document numbers in the FR collection, so only output a given
// docno once.
if (seen.containsKey(docno)) {
continue;
}
seen.put(docno, docno);
System.out.println(qid+" Q0 "+docno+" "+i+" "+hits[i].score+" "+runtag);
}
}
}
The scoring is done in doBatchSearch and now i want to implement HitCollector here.

How do I convert text files to .arff format(weka)

Please advise me How do I convert text files to .arff format(weka)
because i wan to do data clustering for 1000 txt file.
regards
There are some converters implemented in WEKA, just find the right format or make little changes to your data (using awk, sed...).
Here is the API pages related to this topic: http://weka.sourceforge.net/doc.stable/weka/core/converters/package-summary.html
For exapmle here is how to convert from CSV to ARFF:
java weka.core.converters.CSVLoader filename.csv > filename.arff
Here is the code you can use
package text.Classification;
import java.io.*;
import weka.core.*;
public class TextDirectoryToArff {
public Instances createDataset(String directoryPath) throws Exception {
FastVector atts;
FastVector attVals;
atts = new FastVector();
atts.addElement(new Attribute("contents", (FastVector) null));
String[] s = { "class1", "class2", "class3" };
attVals = new FastVector();
for (String p : s)
attVals.addElement(p);
atts.addElement(new Attribute("class", attVals));
Instances data = new Instances("MyRelation", atts, 0);
System.out.println(data);
InputStreamReader is = null;
File dir = new File(directoryPath);
String[] files = dir.list();
for (int i = 0; i < files.length; i++) {
if (files[i].endsWith(".txt")) {
double[] newInst = new double[2];
File txt = new File(directoryPath + File.separator + files[i]);
is = new InputStreamReader(new FileInputStream(txt));
StringBuffer txtStr = new StringBuffer();
int c;
while ((c = is.read()) != -1) {
txtStr.append((char) c);
}
newInst[0] = data.attribute(0).addStringValue(txtStr.toString());
int j=i%(s.length-1);
newInst[1] = attVals.indexOf(s[j]);
data.add(new Instance(1.0, newInst));
}
}
return data;
}
public static void main(String[] args) {
TextDirectoryToArff tdta = new TextDirectoryToArff();
try {
Instances dataset = tdta.createDataset("/home/asadul/Desktop/Downloads/text_example/class5");
PrintWriter fileWriter = new PrintWriter("/home/asadul/Desktop/Downloads/text_example/abc.arff", "UTF-8");
fileWriter.println(dataset);
fileWriter.close();
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
}
}
}