in my C# window application i want to read the text & images from ABC.pdf but its password protected.please help me to solve this issue.
PdfReader reader = new PdfReader(#"E:\ABC.pdf");
int intPageNum = reader.NumberOfPages;
string[] words;
string line;
for (int i = 1; i <= intPageNum; i++)
{
var text = PdfTextExtractor.GetTextFromPage(reader, i, new LocationTextExtractionStrategy());
words = text.Split('\n');
for (int j = 0, len = words.Length; j < len; j++)
{
line = Encoding.UTF8.GetString(Encoding.UTF8.GetBytes(words[j]));
label1.Text = label1.Text + "\n" + line;
}
}
Related
I need to apply an image on all my pages from my PDF, but without flattening it ( I want to have the ability to move it in my PDF reader afterwards)
My code:
String basePath = "d:\\zPDF\\";
DirectoryInfo d = new DirectoryInfo(basePath);//Assuming Test is your Folder
FileInfo[] Files = d.GetFiles("*.pdf"); //Getting Text files
List<string> listS = new List<string>();
foreach (FileInfo file in Files)
{
listS.Add(file.Name);
}
foreach (string s in listS)
{
using (System.IO.Stream inputPdfStream = new FileStream(basePath + s, FileMode.Open, System.IO.FileAccess.Read, FileShare.ReadWrite))
//using (System.IO.Stream inputImageStream = new FileStream(basePath + "x1.wmf", FileMode.Open, System.IO.FileAccess.Read, FileShare.Read))
using (System.IO.Stream inputImageStream2 = new FileStream(basePath + "x2.wmf", FileMode.Open, System.IO.FileAccess.Read, FileShare.Read))
using (System.IO.Stream outputPdfStream = new FileStream(basePath + "zResult" + s, FileMode.Create, System.IO.FileAccess.Write, FileShare.None))
{
var reader = new PdfReader(inputPdfStream);
var stamper = new PdfStamper(reader, outputPdfStream);
//stamper.FormFlattening = true;
//stamper.FreeTextFlattening = true;
int numberOfPages = reader.NumberOfPages;
Image myImage = Image.GetInstance(inputImageStream2);
float f-Image1, f-Image2;
for (int i = 1; i <= numberOfPages; i++)
{
int nr, plusMinus = 25;
Rectangle mediabox = reader.GetPageSize(i);
int getTOP = (int)mediabox.GetTop(0);
int getRight = (int)mediabox.GetRight(0);
var pdfContentByte = stamper.GetOverContent(i);
nr = getRight - 600;
f-Image1 = row1(nr, nr + plusMinus);
//row1 - generates a random number between those 2 values
nr = 40;
f-Image2 = row1(nr, nr + plusMinus);
//row1 - generates a random number between those 2 values
myImage.SetAbsolutePosition(f-Image1, f-Image2);
myImage.RotationDegrees = row1(-35, 35);
pdfContentByte.AddImage(myImage);
}
stamper.Close();
}
I've tried:
stamper.FormFlattening = false;
stamper.FreeTextFlattening = false;
but no results. The image is still flatten.
I think, after I've read some posts, that I need to set up my PdfStamper in useAppendMode() but I don't know how should I do this and, I don't know if this is this right direction.
Vector Highlight functionality is not working properly when we use beider Morse Analyzer in lucene.NET. Anybody came across this issue?
Vector highlight is working fine for standard analyzer but it is not working properly for Beider morse. It is highlighting the entire string.
Directory directory = FSDirectory.GetDirectory("LuceneIndex");
Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
{
var tokenizer = new KeywordTokenizer(input: reader);
PhoneticEngine phoneticEngine = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, false);
var stream = new BeiderMorseFilter(input: tokenizer, phoneticEngine);
return new TokenStreamComponents(tokenizer, stream);
});
IndexWriter writer = new IndexWriter(directory, analyzer);
Document doc = new Document();
doc.Add(new Field("id", i.ToString(), Field.Store.YES, Field.Index.NO));
doc.Add(new Field("EmployeeName", text, Field.Store.YES, Field.Index.TOKENIZED));
writer.AddDocument(doc);
writer.Optimize();
writer.Flush();
writer.Close();
QueryParser queryParser = new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, "EmployeeName", mAnalyzer);
IndexSearcher searcher = new IndexSearcher(directory);
Hits hits = searcher.Search(query);
int results = hits.Length();
Console.WriteLine("Found {0} results", results);
for (int i = 0; i < results; i++)
{
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(queryParser));
int totalScoreDocs = tTopDocs.ScoreDocs.Length > 30 ? 30 : tTopDocs.ScoreDocs.Length;
for (int i = 0; i < totalScoreDocs; i++)
{
int id = tTopDocs.ScoreDocs[i].Doc;
Document doc = searcher.Doc(id);
string text = doc.Get("EmployeeName");
TokenStream tokenStream = TokenSources.GetAnyTokenStream(mIndexSearcher.IndexReader, id, "EmployeeName", analyzer);
TextFragment[] frag = highlighter.GetBestTextFragments(
tokenStream, text, mergeContiguousFragments: false, maxNumFragments: 10); // highlighter.GetBestFragments(tokenStream, text, 3, "...");
for (int j = 0; j < frag.Length; j++)
{
if (frag[j] != null && frag[j].Score > 0)
{
Console.WriteLine(frag[j].ToString());
}
}
}
Sample For Lucene highlight
With Standard analyzer;
Input-John
Output-William <B>John<B> Russel
With Beider Morse analyzer;
Input-John
Output-<B>William John Russel<B>
I'm trying to create PDF from the DataTable in web api using ADO.Net. Unfortunately based on filters some times I may get very less records & able to download without any problem. Sometimes may be very huge like 200 thousand of records. When I'm checking in local my system its getting hang while converting the dt to PDF. My code is like below:
private FileContentResult ExportPDF(DataTable dataTable)
{
string Name = "Logs";
System.IO.MemoryStream mStream = new System.IO.MemoryStream();
byte[] content = null;
try
{
string[] columnNames = (from dc in dataTable.Columns.Cast<DataColumn>() select dc.ColumnName).ToArray();
int count = columnNames.Length;
object[] array = new object[count];
dataTable.Rows.Add(array);
Document pdfDoc = new Document(PageSize.A2, 10f, 10f, 10f, 0f);
PdfWriter writer = PdfWriter.GetInstance(pdfDoc, mStream);
int cols = dataTable.Columns.Count;
int rows = dataTable.Rows.Count;
HeaderFooter header = new HeaderFooter(new Phrase(Name), false);
// Remove the border that is set by default
header.Border = iTextSharp.text.Rectangle.TITLE;
// Align the text: 0 is left, 1 center and 2 right.
header.Alignment = Element.ALIGN_CENTER;
pdfDoc.Header = header;
// Header.
pdfDoc.Open();
iTextSharp.text.Table pdfTable = new iTextSharp.text.Table(cols, rows);
pdfTable.BorderWidth = 1; pdfTable.Width = 100;
pdfTable.Padding = 1; pdfTable.Spacing = 4;
//creating table headers
for (int i = 0; i < cols; i++)
{
Cell cellCols = new Cell();
Chunk chunkCols = new Chunk();
iTextSharp.text.Font ColFont = FontFactory.GetFont(FontFactory.HELVETICA, 14, iTextSharp.text.Font.BOLD, iTextSharp.text.BaseColor.Black);
chunkCols = new Chunk(dataTable.Columns[i].ColumnName, ColFont);
cellCols.Add(chunkCols);
pdfTable.AddCell(cellCols);
}
//creating table data (actual result)
for (int k = 0; k < rows; k++)
{
for (int j = 0; j < cols; j++)
{
Cell cellRows = new Cell();
iTextSharp.text.Font RowFont = FontFactory.GetFont(FontFactory.HELVETICA, 12);
Chunk chunkRows = new Chunk(dataTable.Rows[k][j].ToString(), RowFont);
cellRows.Add(chunkRows);
pdfTable.AddCell(cellRows);
}
}
pdfDoc.Add(pdfTable);
pdfDoc.Close();
content = mStream.ToArray();
return File(content, "application/pdf", "LogReports.pdf");
}
catch (Exception ex)
{
throw new Exception(ex.Message);
}
}
in Adobe Reader the first pages of a ebook can have roman format page number as shown in attached image below
Image : http://i.stack.imgur.com/GSm0Q.jpg
I would like to read these page numbers out (not the indexed page number) with iText but I don't know which properties (labels or annotations..) I should use. I could already open file with PdfReader, loop through all pages but have no idea what I should access for these roman numbers
using (Stream pdfStream = new FileStream(sourceFileName, FileMode.Open))
{
PdfReader pdfReader = new PdfReader(pdfStream);
for (int index = 1; index <= pdfReader.NumberOfPages; index++)
{
}
}
Thanks.
You are looking for the PageLabelExample. In this example, we have a PDF, page_labels.pdf that has pages numbered like this:
In the listPageLabels() method, we create a txt file with all the page labels:
public void listPageLabels(String src, String dest) throws IOException {
// no PDF, just a text file
PrintStream out = new PrintStream(new FileOutputStream(dest));
PdfReader reader = new PdfReader(src);
String[] labels = PdfPageLabels.getPageLabels(reader);
for (int i = 0; i < labels.length; i++) {
out.println(labels[i]);
}
out.flush();
out.close();
reader.close();
}
The result looks like this:
A
B
1
2
3
Movies-4
Movies-5
Movies-6
Movies-7
Movies-8
If you want an iTextSharp example, take a look at this method:
/**
* Reads the page labels from an existing PDF
* #param src the existing PDF
*/
public string ListPageLabels(byte[] src) {
StringBuilder sb = new StringBuilder();
String[] labels = PdfPageLabels.GetPageLabels(new PdfReader(src));
for (int i = 0; i < labels.Length; i++) {
sb.Append(labels[i]);
sb.AppendLine();
}
return sb.ToString();
}
Update
As promised in the comment section: PdfPageLabels.cs
I am not a C# developer, but this is a quick and dirty version of the GetPageLabels() method that doesn't add a prefix:
public static String[] GetPageLabels(PdfReader reader) {
int n = reader.NumberOfPages;
PdfDictionary dict = reader.Catalog;
PdfDictionary labels = (PdfDictionary)PdfReader.GetPdfObjectRelease(dict.Get(PdfName.PAGELABELS));
if (labels == null)
return null;
String[] labelstrings = new String[n];
Dictionary<int, PdfObject> numberTree = PdfNumberTree.ReadTree(labels);
int pagecount = 1;
char type = 'D';
for (int i = 0; i < n; i++) {
if (numberTree.ContainsKey(i)) {
PdfDictionary d = (PdfDictionary)PdfReader.GetPdfObjectRelease(numberTree[i]);
if (d.Contains(PdfName.ST)) {
pagecount = ((PdfNumber)d.Get(PdfName.ST)).IntValue;
}
else {
pagecount = 1;
}
if (d.Contains(PdfName.S)) {
type = ((PdfName)d.Get(PdfName.S)).ToString()[1];
}
else {
type = 'e';
}
}
switch (type) {
default:
labelstrings[i] = "" + pagecount;
break;
case 'R':
labelstrings[i] = RomanNumberFactory.GetUpperCaseString(pagecount);
break;
case 'r':
labelstrings[i] = RomanNumberFactory.GetLowerCaseString(pagecount);
break;
case 'A':
labelstrings[i] = RomanAlphabetFactory.GetUpperCaseString(pagecount);
break;
case 'a':
labelstrings[i] = RomanAlphabetFactory.GetLowerCaseString(pagecount);
break;
case 'e':
labelstrings[i] = "";
break;
}
pagecount++;
}
return labelstrings;
}
I want to split a file ( a docx file) and use the individual fragments of the file to encode a QRCode such that when the qrcodes are read in sequence, it reproduces the original file.
I was able to split the file and create a bunch of QRCodes but when attempted to recreate the file, the Decoder throws the following Error Message.
"Invalid number of finder pattern detected"
I am using http://www.codeproject.com/KB/cs/qrcode.aspx library.
My encoder code
private List Encode(String content, Encoding encoding, int
System.Drawing.Color qrCodeBackgroundColor,
QRCodeCapacity,System.Drawing.Color qrCodeBackgroundColor,System.Drawing.Color
qrCodeForegroundColor,int qrCodeScale, int NoOfQRcodes)
{
List<Bitmap> _qrcodesImages = new List<Bitmap>();
byte[] _filebytearray = encoding.GetBytes(content);
for (int k = 0,l=0; k < NoOfQRcodes; k++)
{
byte[] _tempByteArray = _filebytearray.Skip(l).Take(QRCodeCapacity).ToArray();
bool[][] matrix = calQrcode(_tempByteArray);
SolidBrush brush = new SolidBrush(qrCodeBackgroundColor);
Bitmap image = new Bitmap((matrix.Length * qrCodeScale) + 1, (matrix.Length * qrCodeScale) + 1);
Graphics g = Graphics.FromImage(image);
g.FillRectangle(brush, new Rectangle(0, 0, image.Width, image.Height));
brush.Color = qrCodeForegroundColor;
for (int i = 0; i < matrix.Length; i++)
{
for (int j = 0; j < matrix.Length; j++)
{
if (matrix[j][i])
{
g.FillRectangle(brush, j * qrCodeScale, i * qrCodeScale, qrCodeScale, qrCodeScale);
}
}
}
_qrcodesImages.Add(image);
l += QRCodeCapacity;
}
return _qrcodesImages;
}