in Adobe Reader the first pages of a ebook can have roman format page number as shown in attached image below
Image : http://i.stack.imgur.com/GSm0Q.jpg
I would like to read these page numbers out (not the indexed page number) with iText but I don't know which properties (labels or annotations..) I should use. I could already open file with PdfReader, loop through all pages but have no idea what I should access for these roman numbers
using (Stream pdfStream = new FileStream(sourceFileName, FileMode.Open))
{
PdfReader pdfReader = new PdfReader(pdfStream);
for (int index = 1; index <= pdfReader.NumberOfPages; index++)
{
}
}
Thanks.
You are looking for the PageLabelExample. In this example, we have a PDF, page_labels.pdf that has pages numbered like this:
In the listPageLabels() method, we create a txt file with all the page labels:
public void listPageLabels(String src, String dest) throws IOException {
// no PDF, just a text file
PrintStream out = new PrintStream(new FileOutputStream(dest));
PdfReader reader = new PdfReader(src);
String[] labels = PdfPageLabels.getPageLabels(reader);
for (int i = 0; i < labels.length; i++) {
out.println(labels[i]);
}
out.flush();
out.close();
reader.close();
}
The result looks like this:
A
B
1
2
3
Movies-4
Movies-5
Movies-6
Movies-7
Movies-8
If you want an iTextSharp example, take a look at this method:
/**
* Reads the page labels from an existing PDF
* #param src the existing PDF
*/
public string ListPageLabels(byte[] src) {
StringBuilder sb = new StringBuilder();
String[] labels = PdfPageLabels.GetPageLabels(new PdfReader(src));
for (int i = 0; i < labels.Length; i++) {
sb.Append(labels[i]);
sb.AppendLine();
}
return sb.ToString();
}
Update
As promised in the comment section: PdfPageLabels.cs
I am not a C# developer, but this is a quick and dirty version of the GetPageLabels() method that doesn't add a prefix:
public static String[] GetPageLabels(PdfReader reader) {
int n = reader.NumberOfPages;
PdfDictionary dict = reader.Catalog;
PdfDictionary labels = (PdfDictionary)PdfReader.GetPdfObjectRelease(dict.Get(PdfName.PAGELABELS));
if (labels == null)
return null;
String[] labelstrings = new String[n];
Dictionary<int, PdfObject> numberTree = PdfNumberTree.ReadTree(labels);
int pagecount = 1;
char type = 'D';
for (int i = 0; i < n; i++) {
if (numberTree.ContainsKey(i)) {
PdfDictionary d = (PdfDictionary)PdfReader.GetPdfObjectRelease(numberTree[i]);
if (d.Contains(PdfName.ST)) {
pagecount = ((PdfNumber)d.Get(PdfName.ST)).IntValue;
}
else {
pagecount = 1;
}
if (d.Contains(PdfName.S)) {
type = ((PdfName)d.Get(PdfName.S)).ToString()[1];
}
else {
type = 'e';
}
}
switch (type) {
default:
labelstrings[i] = "" + pagecount;
break;
case 'R':
labelstrings[i] = RomanNumberFactory.GetUpperCaseString(pagecount);
break;
case 'r':
labelstrings[i] = RomanNumberFactory.GetLowerCaseString(pagecount);
break;
case 'A':
labelstrings[i] = RomanAlphabetFactory.GetUpperCaseString(pagecount);
break;
case 'a':
labelstrings[i] = RomanAlphabetFactory.GetLowerCaseString(pagecount);
break;
case 'e':
labelstrings[i] = "";
break;
}
pagecount++;
}
return labelstrings;
}
Related
Vector Highlight functionality is not working properly when we use beider Morse Analyzer in lucene.NET. Anybody came across this issue?
Vector highlight is working fine for standard analyzer but it is not working properly for Beider morse. It is highlighting the entire string.
Directory directory = FSDirectory.GetDirectory("LuceneIndex");
Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
{
var tokenizer = new KeywordTokenizer(input: reader);
PhoneticEngine phoneticEngine = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, false);
var stream = new BeiderMorseFilter(input: tokenizer, phoneticEngine);
return new TokenStreamComponents(tokenizer, stream);
});
IndexWriter writer = new IndexWriter(directory, analyzer);
Document doc = new Document();
doc.Add(new Field("id", i.ToString(), Field.Store.YES, Field.Index.NO));
doc.Add(new Field("EmployeeName", text, Field.Store.YES, Field.Index.TOKENIZED));
writer.AddDocument(doc);
writer.Optimize();
writer.Flush();
writer.Close();
QueryParser queryParser = new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, "EmployeeName", mAnalyzer);
IndexSearcher searcher = new IndexSearcher(directory);
Hits hits = searcher.Search(query);
int results = hits.Length();
Console.WriteLine("Found {0} results", results);
for (int i = 0; i < results; i++)
{
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(queryParser));
int totalScoreDocs = tTopDocs.ScoreDocs.Length > 30 ? 30 : tTopDocs.ScoreDocs.Length;
for (int i = 0; i < totalScoreDocs; i++)
{
int id = tTopDocs.ScoreDocs[i].Doc;
Document doc = searcher.Doc(id);
string text = doc.Get("EmployeeName");
TokenStream tokenStream = TokenSources.GetAnyTokenStream(mIndexSearcher.IndexReader, id, "EmployeeName", analyzer);
TextFragment[] frag = highlighter.GetBestTextFragments(
tokenStream, text, mergeContiguousFragments: false, maxNumFragments: 10); // highlighter.GetBestFragments(tokenStream, text, 3, "...");
for (int j = 0; j < frag.Length; j++)
{
if (frag[j] != null && frag[j].Score > 0)
{
Console.WriteLine(frag[j].ToString());
}
}
}
Sample For Lucene highlight
With Standard analyzer;
Input-John
Output-William <B>John<B> Russel
With Beider Morse analyzer;
Input-John
Output-<B>William John Russel<B>
I'm trying to create PDF from the DataTable in web api using ADO.Net. Unfortunately based on filters some times I may get very less records & able to download without any problem. Sometimes may be very huge like 200 thousand of records. When I'm checking in local my system its getting hang while converting the dt to PDF. My code is like below:
private FileContentResult ExportPDF(DataTable dataTable)
{
string Name = "Logs";
System.IO.MemoryStream mStream = new System.IO.MemoryStream();
byte[] content = null;
try
{
string[] columnNames = (from dc in dataTable.Columns.Cast<DataColumn>() select dc.ColumnName).ToArray();
int count = columnNames.Length;
object[] array = new object[count];
dataTable.Rows.Add(array);
Document pdfDoc = new Document(PageSize.A2, 10f, 10f, 10f, 0f);
PdfWriter writer = PdfWriter.GetInstance(pdfDoc, mStream);
int cols = dataTable.Columns.Count;
int rows = dataTable.Rows.Count;
HeaderFooter header = new HeaderFooter(new Phrase(Name), false);
// Remove the border that is set by default
header.Border = iTextSharp.text.Rectangle.TITLE;
// Align the text: 0 is left, 1 center and 2 right.
header.Alignment = Element.ALIGN_CENTER;
pdfDoc.Header = header;
// Header.
pdfDoc.Open();
iTextSharp.text.Table pdfTable = new iTextSharp.text.Table(cols, rows);
pdfTable.BorderWidth = 1; pdfTable.Width = 100;
pdfTable.Padding = 1; pdfTable.Spacing = 4;
//creating table headers
for (int i = 0; i < cols; i++)
{
Cell cellCols = new Cell();
Chunk chunkCols = new Chunk();
iTextSharp.text.Font ColFont = FontFactory.GetFont(FontFactory.HELVETICA, 14, iTextSharp.text.Font.BOLD, iTextSharp.text.BaseColor.Black);
chunkCols = new Chunk(dataTable.Columns[i].ColumnName, ColFont);
cellCols.Add(chunkCols);
pdfTable.AddCell(cellCols);
}
//creating table data (actual result)
for (int k = 0; k < rows; k++)
{
for (int j = 0; j < cols; j++)
{
Cell cellRows = new Cell();
iTextSharp.text.Font RowFont = FontFactory.GetFont(FontFactory.HELVETICA, 12);
Chunk chunkRows = new Chunk(dataTable.Rows[k][j].ToString(), RowFont);
cellRows.Add(chunkRows);
pdfTable.AddCell(cellRows);
}
}
pdfDoc.Add(pdfTable);
pdfDoc.Close();
content = mStream.ToArray();
return File(content, "application/pdf", "LogReports.pdf");
}
catch (Exception ex)
{
throw new Exception(ex.Message);
}
}
I am trying to change background color of all images of pdf using Itextshap.
How can i loop through all images and change background color of the images
I used below code to extract pdf images
public static void ExtractImagesFromPDF(string sourcePdf, string outputPath)
{
// NOTE: This will only get the first image it finds per page.
PdfReader pdf = new PdfReader(sourcePdf);
RandomAccessFileOrArray raf = new iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdf);
try
{
for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++)
{
PdfDictionary pg = pdf.GetPageN(pageNumber);
// recursively search pages, forms and groups for images.
PdfObject obj = FindImageInPDFDictionary(pg);
if (obj != null)
{
int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
PdfObject pdfObj = pdf.GetPdfObject(XrefIndex);
PdfStream pdfStrem = (PdfStream)pdfObj;
byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem);
if ((bytes != null))
{
using (System.IO.MemoryStream memStream = new System.IO.MemoryStream(bytes))
{
memStream.Position = 0;
System.Drawing.Image img = System.Drawing.Image.FromStream(memStream);
// must save the file while stream is open.
if (!Directory.Exists(outputPath))
Directory.CreateDirectory(outputPath);
string path = Path.Combine(outputPath, String.Format(#"{0}.jpg", pageNumber));
System.Drawing.Imaging.EncoderParameters parms = new System.Drawing.Imaging.EncoderParameters(1);
parms.Param[0] = new System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Compression, 0);
System.Drawing.Imaging.ImageCodecInfo jpegEncoder = Utilities.GetImageEncoder("JPEG");
img.Save(path, jpegEncoder, parms);
}
}
}
}
}
catch
{
throw;
}
finally
{
pdf.Close();
raf.Close();
}
}
private static PdfObject FindImageInPDFDictionary(PdfDictionary pg)
{
PdfDictionary res =
(PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
PdfDictionary xobj =
(PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
if (xobj != null)
{
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (obj.IsIndirect())
{
PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
PdfName type =
(PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));
//image at the root of the pdf
if (PdfName.IMAGE.Equals(type))
{
return obj;
}// image inside a form
else if (PdfName.FORM.Equals(type))
{
return FindImageInPDFDictionary(tg);
} //image inside a group
else if (PdfName.GROUP.Equals(type))
{
return FindImageInPDFDictionary(tg);
}
}
}
}
return null;
}
Many thanks in advance
I am using iTextsharp to extract images from the PDF file, i am able to extract images but the extracted images are not in correct format (i.e. it looks like negative proof).
Code:
string sFilePath = "Test3.pdf";
int pageNum = 1;
PdfReader pdf = new PdfReader(sFilePath);
PdfDictionary pg = pdf.GetPageN(pageNum);
PdfDictionary res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
PdfDictionary xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
if (xobj == null) { return; }
int imageCount = 0;
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (!obj.IsIndirect()) { continue; }
PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
PdfName type = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));
if (!type.Equals(PdfName.IMAGE)) { continue; }
int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
PdfObject pdfObj = pdf.GetPdfObject(XrefIndex);
PdfStream pdfStrem = (PdfStream)pdfObj;
byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem);
if (bytes == null) { continue; }
using (System.IO.MemoryStream memStream = new System.IO.MemoryStream(bytes))
{
try
{
memStream.Position = 0;
System.Drawing.Image img = System.Drawing.Image.FromStream(memStream);
if (!Directory.Exists(imgPath))
Directory.CreateDirectory(imgPath);
string path = Path.Combine(imgPath, String.Format(#"{0}.jpg", ++imageCount));
System.Drawing.Imaging.EncoderParameters parms = new System.Drawing.Imaging.EncoderParameters(1);
parms.Param[0] = new System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Compression, 0);
var jpegEncoder = ImageCodecInfo.GetImageEncoders().ToList().Find(x => x.FormatID == ImageFormat.Jpeg.Guid);
img.Save(path, jpegEncoder, parms);
}
catch (Exception ex)
{
}
}
}
I have a form fill pdf where we dynamically add text field values to pdf. after adding this i need to add the table in the same page of the pdf.
if i add table it creates new pdf with only table. all other existing data are cleared.
I am using below code :
private void AddTableToPDF()
{
Document doc = new Document(iTextSharp.text.PageSize.LETTER, 10, 10, 100, 100);
try
{
string pdfFilePath = #"D:\Temp\PDF\Inspection Form - Steel Girder.pdf";
PdfWriter wri = PdfWriter.GetInstance(doc, new FileStream(pdfFilePath, FileMode.Append));
doc.Open();//Open Document to write
System.IO.MemoryStream mStream = new System.IO.MemoryStream();
PdfWriter writer = PdfWriter.GetInstance(doc, mStream);
DataTable dt = GetDataTable();
DataTable dtHeader = new DataTable();
dtHeader = GetHeaderDataTable();
if (dtHeader != null)
{
PdfPTable PdfTable = new PdfPTable(dtHeader.Columns.Count);
PdfPCell PdfPCell = null;
for (int rows = 0; rows < dtHeader.Rows.Count; rows++)
{
for (int column = 0; column < dtHeader.Columns.Count; column++)
{
PdfPCell = new PdfPCell(new Phrase(new Chunk(dtHeader.Rows[rows][column].ToString(), font8)));
PdfTable.AddCell(PdfPCell);
}
}
doc.Add(PdfTable); // add pdf table to the document
}
if (dt != null)
{
PdfPTable PdfTable = new PdfPTable(dt.Columns.Count);
PdfPCell PdfPCell = null;
PdfPCell = new PdfPCell(new Phrase(new Chunk("Reference", font8)));
PdfTable.AddCell(PdfPCell);
PdfPCell = new PdfPCell(new Phrase(new Chunk("Remark", font8)));
PdfTable.AddCell(PdfPCell);
PdfPCell = new PdfPCell(new Phrase(new Chunk("Description", font8)));
PdfTable.AddCell(PdfPCell);
for (int rows = 0; rows < dt.Rows.Count; rows++)
{
for (int column = 0; column < dt.Columns.Count; column++)
{
PdfPCell = new PdfPCell(new Phrase(new Chunk(dt.Rows[rows][column].ToString(), font8)));
PdfTable.AddCell(PdfPCell);
}
}
doc.Add(PdfTable); // add pdf table to the document
}
}
catch (DocumentException docEx)
{
Response.Write(docEx.Message);
}
catch (IOException ioEx)
{
Response.Write(ioEx.Message);
}
catch (Exception ex)
{
Response.Write(ex.Message);
}
finally
{
doc.Close();
}
}
Suppose your main PDF is named with m1.pdf. you just create a new PDF with name m2.pdf which will contain the new Table that you need to add. now just merge these Two PDF (m1.pdf & m2.pdf) in to new one 'MergedNerw.pdf')
To Merge Two PDF please use following code this Works fine i have Tested it:
protected void BtnMerge_Click(object sender, EventArgs e)
{
String[] files = #"d:\m1.pdf,d:\m2.pdf".Split(',');
MergeFiles(#"d:\MergedNew.pdf", files);
}
public void MergeFiles(string destinationFile, string[] sourceFiles)
{
if (System.IO.File.Exists(destinationFile))
System.IO.File.Delete(destinationFile);
string[] sSrcFile;
sSrcFile = new string[2];
string[] arr = new string[2];
for (int i = 0; i <= sourceFiles.Length - 1; i++)
{
if (sourceFiles[i] != null)
{
if (sourceFiles[i].Trim() != "")
arr[i] = sourceFiles[i].ToString();
}
}
if (arr != null)
{
sSrcFile = new string[2];
for (int ic = 0; ic <= arr.Length - 1; ic++)
{
sSrcFile[ic] = arr[ic].ToString();
}
}
try
{
int f = 0;
PdfReader reader = new PdfReader(sSrcFile[f]);
int n = reader.NumberOfPages;
//Response.Write("There are " + n + " pages in the original file.");
Document document = new Document(PageSize.A4);
PdfWriter writer = PdfWriter.GetInstance(document, new FileStream(destinationFile, FileMode.Create));
document.Open();
PdfContentByte cb = writer.DirectContent;
PdfImportedPage page;
int rotation;
while (f < sSrcFile.Length)
{
int i = 0;
while (i < n)
{
i++;
document.SetPageSize(PageSize.A4);
document.NewPage();
page = writer.GetImportedPage(reader, i);
rotation = reader.GetPageRotation(i);
if (rotation == 90 || rotation == 270)
{
cb.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height);
}
else
{
cb.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
}
//Response.Write("\n Processed page " + i);
}
f++;
if (f < sSrcFile.Length)
{
reader = new iTextSharp.text.pdf.PdfReader(sSrcFile[f]);
//get the numnber of pages
n = reader.NumberOfPages;
//Response.Write("There are " + n + " pages in the original file.");
}
}
//Response.Write("Success");
document.Close();
}
catch (Exception e)
{
//Response.Write(e.Message);
}
}
Hope this helps you!