iTextSharp document close is causing an extra blank page - pdf

I am using the PdfPageEventHelper in order to be able to add Header and Footer to each page of my document automatically. As was mentioned in many places I am doing so while overriding the OnEndPage.
On my class I am creating:
PdfDocument
creating a FileStream
getting a PdfWriter vis the the static GetInstance method
setting the specific PdfPageEventHelper class that I've created to the writer.PageEvent
adding the writer to the document
calling the document open
adding some content to the document (one very small table having one row)
calling the close document
Now - at step #8 the OnEndPage is being called, which is great, but somehow it is being called twice!
both times it is called to page number 1 (as I see on runtime in the document parameter)
therefore I am getting 2 pages in my document instead of one, where the second page is empty, and the first page is actually having my header and footer twice (overlapping).
I am using iTextSharp version 5.5.1.0 , and I saw on the source file that in Document.Close method they are calling NewPage function... this is why I am getting to OnEndPage in the 2nd time.
Any suggestions?
class MyPdfWriter
{
public MyPdfWriter()
{
//generate doc, file stream etc.
_document = _document = new PdfDocument();
_document.SetMargins(15, 15, 50, 50);
_document.SetPageSize(PageSize.A4);
_fs = new FileStream("myTest.pdf", FileMode.Create);
_writer = PdfWriter.GetInstance(_document, _fs);
_writer.PageEmpty = false;
_writer.PageEvent = new PdfPage(reportDetails.Header,reportDetails.Footer,reportDetails.LogoImage, reportDetails.ReportFileName);
//open doc
_document.Open();
//add some content
var table = new PdfPTable(1);
table.AddCell("bla bla");
_document.Add(titleTable);
//close doc, stream etc.
if (_document != null && _document.IsOpen())
{
_document.Close();
_document = null;
}
if (_writer != null)
{
_writer.Close();
_writer = null;
}
if (_fs != null)
{
_fs.Close();
_fs.Dispose();
}
}
}
class PdfPage : PdfPageEventHelper
{
public override void OnEndPage(PdfWriter writer, Document document)
{
var footer = new PdfPTable(2);
//Write some cells to footer
//....
//init the width
footer.TotalWidth = (footer.TotalWidth.CompareTo(0f) != 0) ? footer.TotalWidth : document.PageSize.Width;
//write the table with WriteSelectedRows
footer.WriteSelectedRows(0, -1, document.LeftMargin, footer.TotalHeight + 10f, writer.DirectContent);
var Header = new PdfPTable(2);
//Write some cells to Header
//....
//init the width
Header.TotalWidth = (Header.TotalWidth.CompareTo(0f) != 0) ? Header.TotalWidth : document.PageSize.Width;
//write the table with WriteSelectedRows
Header.WriteSelectedRows(0, -1, document.LeftMargin, document.PageSize.Height - 10,
writer.DirectContent);
}
}

Related

How to generate the Thumbnail of the PDF first page in Xamarin Forms

I'm using the Syncfusion PDF viewer for Xamarin Forms to display a collection of PDF files and it seems the conversion from PDF to PNG (to extract the first page (the cover) of the PDF file to show it, to the user, into a carousel) didn't work in Xamarin [see https://www.syncfusion.com/kb/9112/how-to-convert-pdf-to-png]
I wonder if there is a way to convert PDF into PNG on the Xamarin platform or if I should convert it on the server side.
Thank you.
You can export PDF pages to images without using Syncfusion PDF Viewer control by consuming the PdfRenderer , CGPDFDocument, and PdfDocument classes.
Xamarin.Forms.Android:
//initialize PDFRenderer by passing PDF file from location.
PdfRenderer renderer = new PdfRenderer(GetSeekableFileDescriptor());
int pageCount = renderer.PageCount;
for(int i=0;i<pageCount;i++)
{
// Use `openPage` to open a specific page in PDF.
Page page = renderer.OpenPage(i);
//Creates bitmap
Bitmap bmp = Bitmap.CreateBitmap(page.Width, page.Height, Bitmap.Config.Argb8888);
//renderes page as bitmap, to use portion of the page use second and third parameter
page.Render(bmp, null, null, PdfRenderMode.ForDisplay);
//Save the bitmap
SaveImage(bmp);
page.Close();
}
//Method to retrieve PDF file from the location
private ParcelFileDescriptor GetSeekableFileDescriptor()
{
ParcelFileDescriptor fileDescriptor = null;
try
{
string root = Android.OS.Environment.ExternalStorageDirectory.ToString()+ "/Syncfusion/sample.pdf";
fileDescriptor = ParcelFileDescriptor.Open(new Java.IO.File(root),ParcelFileMode.ReadOnly
);
}
catch (FileNotFoundException e)
{
}
return fileDescriptor;
}
Xamarin.Forms.iOS:
public void ConvertToImage(Stream fileStream) //Pass PDF stream
{
MemoryStream stream = new MemoryStream();
// Create memory stream from file stream.
fileStream.CopyTo(stream);
// Create data provider from bytes.
CGDataProvider provider = new CGDataProvider(stream.ToArray());
try
{
//Load a PDF file.
m_pdfDcument = new CGPDFDocument(provider);
}
catch (Exception)
{
}
//Get PDF's page and convert as image.
using (CGPDFPage pdfPage = m_pdfDcument.GetPage(2))
{
//initialise image context.
UIGraphics.BeginImageContext(pdfPage.GetBoxRect(CGPDFBox.Media).Size);
// get current context.
CGContext context = UIGraphics.GetCurrentContext();
context.SetFillColor(1.0f, 1.0f, 1.0f, 1.0f);
// Gets page's bounds.
CGRect bounds = new CGRect(pdfPage.GetBoxRect(CGPDFBox.Media).X, pdfPage.GetBoxRect(CGPDFBox.Media).Y, pdfPage.GetBoxRect(CGPDFBox.Media).Width, pdfPage.GetBoxRect(CGPDFBox.Media).Height);
if (pdfPage != null)
{
context.FillRect(bounds);
context.TranslateCTM(0, bounds.Height);
context.ScaleCTM(1.0f, -1.0f);
context.ConcatCTM(pdfPage.GetDrawingTransform(CGPDFBox.Crop, bounds, 0, true));
context.SetRenderingIntent(CGColorRenderingIntent.Default);
context.InterpolationQuality = CGInterpolationQuality.Default;
// Draw PDF page in the context.
context.DrawPDFPage(pdfPage);
// Get image from current context.
pdfImage = UIGraphics.GetImageFromCurrentImageContext();
UIGraphics.EndImageContext();
}
}
// Get bytes from UIImage object.
using (var imageData = pdfImage.AsPNG())
{
imageBytes = new byte[imageData.Length];
System.Runtime.InteropServices.Marshal.Copy(imageData.Bytes, imageBytes, 0, Convert.ToInt32(imageData.Length));
//return bytes;
}
//Create image from bytes.
imageStream = new MemoryStream(imageBytes);
//Save the image. It is a custom method to save the image
Save("PDFtoImage.png", "image/png", imageStream);
}
Xamarin.Forms.UWP
public async void ConvertToImage(Stream fileStream) //Pass PDF stream
{
StorageFile file = null;
//Creates file picker to choose PDF file.
FileOpenPicker filePicker = new FileOpenPicker();
filePicker.FileTypeFilter.Add(".pdf");
filePicker.ViewMode = PickerViewMode.Thumbnail;
filePicker.SuggestedStartLocation = PickerLocationId.DocumentsLibrary;
filePicker.SettingsIdentifier = "picker1";
filePicker.CommitButtonText = "Open Pdf File";
//Open file picker option
file = await filePicker.PickSingleFileAsync();
// Load selected PDF file from the file picker.
PdfDocument pdfDocument = await PdfDocument.LoadFromFileAsync(file);
if (pdfDocument != null && pdfDocument.PageCount > 0)
{
for (int pageIndex = 0; pageIndex < pdfDocument.PageCount; pageIndex++)
{
//Get page from a PDF file.
var pdfPage = pdfDocument.GetPage((uint)pageIndex);
if (pdfPage != null)
{
//Create temporary folder to store images.
StorageFolder tempFolder = ApplicationData.Current.TemporaryFolder;
//Create image file.
StorageFile destinationFile = await KnownFolders.CameraRoll.CreateFileAsync(Guid.NewGuid().ToString() + ".jpg");
if (destinationFile != null)
{
IRandomAccessStream randomStream = await destinationFile.OpenAsync(FileAccessMode.ReadWrite);
//Crerate PDF rendering options
PdfPageRenderOptions pdfPageRenderOptions = new PdfPageRenderOptions();
pdfPageRenderOptions.DestinationWidth = (uint)(300);
// Render the PDF's page as stream.
await pdfPage.RenderToStreamAsync(randomStream, pdfPageRenderOptions);
await randomStream.FlushAsync();
//Dispose the random stream
randomStream.Dispose();
//Dispose the PDF's page.
pdfPage.Dispose();
}
}
}
}
}
I work for Syncfusion.

How do I make modifications to existing layer(Optional Content Group) in pdf?

I am implementing functionality to allow user to draw figures in pdf. I want to draw all the figures in a single layer, which can be made visible or invisible by the user.I am able to create a new layer in a pdf. I am also able to retrieve that layer.But, I am not able to make modification to layer (PDOptionalContentGroup). I tried converting the PDOptionalContentGroup to PDPage and then making desired changes to PDPPage. I also saved the PDDocument.It only created another layer with the same name as previous one, but the changes were not there.Here is the code that I used:
PDFont font = PDType1Font.HELVETICA;
PDDocument doc = PDDocument.load(src);
PDOptionalContentProperties ocprops = doc.getDocumentCatalog().getOCProperties();
foreach (string groupName in ocprops.getGroupNames())
{
PDOptionalContentGroup group = ocprops.getGroup(groupName);
COSBase cosbase = group.getCOSObject();
PDPage groupPage = new PDPage((COSDictionary)cosbase);
PDPageContentStream cs = new PDPageContentStream(doc, groupPage, true, false);
cs.beginText();
cs.setFont(font, 12);
cs.moveTextPositionByAmount(150, 200);
cs.drawString("Testing added to group:" + groupName);
cs.endText();
cs.close();
doc.save(src);
}
(In a comment the OP indicated that he can only use a 1.8.x version of PDFBox. Thus, the code here is 1.8'ish, tested against PDFBox 1.8.12 for Java.)
In a comment to your question "How to get resource names for optional content group in a pdf?" Tilman Hausherr suggested to use the PDFBox class LayerUtility as template for own solutions.
Thus, as an example how add to an existing OCG this helper method (based on LayerUtility.appendFormAsLayer) shows how to add text to an existing or new OCG. It should be simple to adapt it to adding the content you want to add...
void addTextToLayer(PDDocument document, int pageNumber, String layerName, float x, float y, String text) throws IOException
{
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDOptionalContentProperties ocprops = catalog.getOCProperties();
if (ocprops == null)
{
ocprops = new PDOptionalContentProperties();
catalog.setOCProperties(ocprops);
}
PDOptionalContentGroup layer = null;
if (ocprops.hasGroup(layerName))
{
layer = ocprops.getGroup(layerName);
}
else
{
layer = new PDOptionalContentGroup(layerName);
ocprops.addGroup(layer);
}
PDPage page = (PDPage) document.getDocumentCatalog().getAllPages().get(pageNumber);
PDResources resources = page.findResources();
if (resources == null)
{
resources = new PDResources();
page.setResources(resources);
}
PDPropertyList props = resources.getProperties();
if (props == null)
{
props = new PDPropertyList();
resources.setProperties(props);
}
//Find first free resource name with the pattern "MC<index>"
int index = 0;
PDOptionalContentGroup ocg;
COSName resourceName;
do
{
resourceName = COSName.getPDFName("MC" + index);
ocg = props.getOptionalContentGroup(resourceName);
index++;
} while (ocg != null);
//Put mapping for our new layer/OCG
props.putMapping(resourceName, layer);
PDFont font = PDType1Font.HELVETICA;
PDPageContentStream contentStream = new PDPageContentStream(document, page, true, true, true);
contentStream.beginMarkedContentSequence(COSName.OC, resourceName);
contentStream.beginText();
contentStream.setFont(font, 12);
contentStream.moveTextPositionByAmount(x, y);
contentStream.drawString(text);
contentStream.endText();
contentStream.endMarkedContentSequence();
contentStream.close();
}
(AddContentToOCG helper method addTextToLayer)
You can use it like this
PDDocument document = new PDDocument();
PDPage page = new PDPage();
document.addPage(page);
addTextToLayer(document, 0, "MyLayer", 30, 600, "Text in new layer 'MyLayer'");
addTextToLayer(document, 0, "MyOtherLayer", 230, 550, "Text in new layer 'MyOtherLayer'");
addTextToLayer(document, 0, "MyLayer", 30, 500, "Text in existing layer 'MyLayer'");
addTextToLayer(document, 0, "MyOtherLayer", 230, 450, "Text in existing layer 'MyOtherLayer'");
document.save(new File(RESULT_FOLDER, "TextInOCGs.pdf"));
document.close();
(AddContentToOCG test method testAddContentToNewOrExistingOCG)
to add text to existing or not yet existing OCGs.

Search Text on Image in vaadin

I designed a Document Viewer that displays the PDF pages in image format.
I need to implement search functionality on this image formatted pages. When the user enters a term in the search textfield, the term has to be highlighted wherever it is present in the image. This is the code I have tried.
private VerticalLayout buildMainVerticalLayout()
{
mainVerticalLayout = new VerticalLayout();
SwingController controller = new SwingController();
SwingViewBuilder factory = new SwingViewBuilder(controller);
JPanel viewerComponentPanel = factory.buildViewerPanel();
controller.getDocumentViewController().setAnnotationCallback(
new org.icepdf.ri.common.MyAnnotationCallback(
controller.getDocumentViewController()));
JFrame applicationFrame = new JFrame();
applicationFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
applicationFrame.getContentPane().add(viewerComponentPanel);
controller.openDocument("test.pdf");
applicationFrame.pack();
applicationFrame.setVisible(true);
DocumentSearchController searchController =
controller.getDocumentSearchController();
for (String term : terms) {
searchController.addSearchTerm(term, false, false);
}
Document document = controller.getDocument();
ArrayList<WordText> foundWords;
for (int pageIndex = 0; pageIndex < document.getNumberOfPages();
pageIndex++) {
foundWords = searchController.searchPage(pageIndex);
System.out.println("Page " + pageIndex);
if (foundWords != null){
for (WordText wordText : foundWords){
System.out.println(" found hit: " + wordText.toString());
}
}
}
return mainVerticalLayout;
}
But the output showing an error. I have tried with many possibilities. It is just opening the editor, but the PDF isn't loading.
Can anyone help me with this?

Skip adding empty tables to PDF when parsing XHTML using ITextSharp

ITextSharp throws an error when you attempt to create a PdfTable with 0 columns.
I have a requirement to take XHTML that is generated using an XSLT transformation and generate a PDF from it. Currently I am using ITextSharp to do so. The problem that I am having is the XHTML that is generated sometimes contains tables with 0 rows, so when ITextSharp attempts to parse them into a table it throws and error saying there are 0 columns in the table.
The reason it says 0 columns is because ITextSharp sets the number of columns in the table to the maximum of the number of columns in each row, and since there are no rows the max number of columns in any given row is 0.
How do I go about catching these HTML table declarations with 0 rows and stop them from being parsed into PDF elements?
I've found the piece of code that is causing the error is within the HtmlPipeline, so I could copy and paste the implementation into a class extending HtmlPipeline and overriding its methods and then do my logic to check for empty tables there, but that seems sloppy and inefficient.
Is there a way to catch the empty table before it is parsed?
=Solution=
The Tag Processor
public class EmptyTableTagProcessor : Table
{
public override IList<IElement> End(IWorkerContext ctx, Tag tag, IList<IElement> currentContent)
{
if (currentContent.Count > 0)
{
return base.End(ctx, tag, currentContent);
}
return new List<IElement>();
}
}
And using the Tag Processor...
//CSS
var cssResolver = XMLWorkerHelper.GetInstance().GetDefaultCssResolver(true);
//HTML
var fontProvider = new XMLWorkerFontProvider();
var cssAppliers = new CssAppliersImpl(fontProvider);
var tagProcessorFactory = Tags.GetHtmlTagProcessorFactory();
tagProcessorFactory.AddProcessor(new EmptyTableTagProcessor(), new string[] { "table" });
var htmlContext = new HtmlPipelineContext(cssAppliers);
htmlContext.SetTagFactory(tagProcessorFactory);
//PIPELINE
var pipeline =
new CssResolverPipeline(cssResolver,
new HtmlPipeline(htmlContext,
new PdfWriterPipeline(document, pdfWriter)));
//XML WORKER
var xmlWorker = new XMLWorker(pipeline, true);
using (var stringReader = new StringReader(html))
{
xmlParser.Parse(stringReader);
}
This solution removes the empty table tags and still writes the PDF as a part of the pipeline.
You should be able to write your own tag processor that accounts for that scenario by subclassing iTextSharp.tool.xml.html.AbstractTagProcessor. In fact, to make your life even easier you can subclass the already existing more specific iTextSharp.tool.xml.html.table.Table:
public class TableTagProcessor : iTextSharp.tool.xml.html.table.Table {
public override IList<IElement> End(IWorkerContext ctx, Tag tag, IList<IElement> currentContent) {
//See if we've got anything to work with
if (currentContent.Count > 0) {
//If so, let our parent class worry about it
return base.End(ctx, tag, currentContent);
}
//Otherwise return an empty list which should make everyone happy
return new List<IElement>();
}
}
Unfortunately, if you want to use a custom tag processor you can't use the shortcut XMLWorkerHelper class and instead you'll need to parse the HTML into elements and add them to your document. To do that you'll need an instance of iTextSharp.tool.xml.IElementHandler which you can create like:
public class SampleHandler : iTextSharp.tool.xml.IElementHandler {
//Generic list of elements
public List<IElement> elements = new List<IElement>();
//Add the supplied item to the list
public void Add(IWritable w) {
if (w is WritableElement) {
elements.AddRange(((WritableElement)w).Elements());
}
}
}
You can use the above with the following code which includes some sample invalid HTML.
//Hold everything in memory
using (var ms = new MemoryStream()) {
//Create new PDF document
using (var doc = new Document()) {
using (var writer = PdfWriter.GetInstance(doc, ms)) {
doc.Open();
//Sample HTML
string html = "<table><tr><td>Hello</td></tr></table><table></table>";
//Create an instance of our element helper
var XhtmlHelper = new SampleHandler();
//Begin pipeline
var htmlContext = new HtmlPipelineContext(null);
//Get the default tag processor
var tagFactory = iTextSharp.tool.xml.html.Tags.GetHtmlTagProcessorFactory();
//Add an instance of our new processor
tagFactory.AddProcessor(new TableTagProcessor(), new string[] { "table" });
//Bind the above to the HTML context part of the pipeline
htmlContext.SetTagFactory(tagFactory);
//Get the default CSS handler and create some boilerplate pipeline stuff
var cssResolver = XMLWorkerHelper.GetInstance().GetDefaultCssResolver(false);
var pipeline = new CssResolverPipeline(cssResolver, new HtmlPipeline(htmlContext, new ElementHandlerPipeline(XhtmlHelper, null)));//Here's where we add our IElementHandler
//The worker dispatches commands to the pipeline stuff above
var worker = new XMLWorker(pipeline, true);
//Create a parser with the worker listed as the dispatcher
var parser = new XMLParser();
parser.AddListener(worker);
//Finally, parse our HTML directly.
using (TextReader sr = new StringReader(html)) {
parser.Parse(sr);
}
//The above did not touch our document. Instead, all "proper" elements are stored in our helper class XhtmlHelper
foreach (var element in XhtmlHelper.elements) {
//Add these to the main document
doc.Add(element);
}
doc.Close();
}
}
}

how to append one pdf to other pdf file using itextsharp

How to append pages to one pdf file from another pdf file without creating a new pdf using itextsharp. I have metadata attached to one pdf so i just want to add only the other pdf pages,so that first pdf metadata should remain as it is.
Regards
Himvj
Assuming you have 2 pdf files: file1.pdf and file2.pdf that you want to concatenate and save the resulting pdf to file1.pdf (by replacing its contents) you could try the following:
using (var output = new MemoryStream())
{
var document = new Document();
var writer = new PdfCopy(document, output);
document.Open();
foreach (var file in new[] { "file1.pdf", "file2.pdf" })
{
var reader = new PdfReader(file);
int n = reader.NumberOfPages;
PdfImportedPage page;
for (int p = 1; p <= n; p++)
{
page = writer.GetImportedPage(reader, p);
writer.AddPage(page);
}
}
document.Close();
File.WriteAllBytes("file1.pdf", output.ToArray());
}
You can try this it add the whole document with metadata
public static void MergeFiles(string destinationFile, string[] sourceFiles)
{
try
{
//1: Create the MemoryStream for the destination document.
using (MemoryStream ms = new MemoryStream())
{
//2: Create the PdfCopyFields object.
PdfCopyFields copy = new PdfCopyFields(ms);
// - Set the security and other settings for the destination file.
//copy.Writer.SetEncryption(PdfWriter.STRENGTH128BITS, null, "1234", PdfWriter.AllowPrinting | PdfWriter.AllowCopy | PdfWriter.AllowFillIn);
copy.Writer.ViewerPreferences = PdfWriter.PageModeUseOutlines;
// - Create an arraylist to hold bookmarks for later use.
ArrayList outlines = new ArrayList();
int pageOffset = 0;
int f = 0;
//3: Import the documents specified in args[1], args[2], etc...
while (f < sourceFiles.Length)
{
// Grab the file from args[] and open it with PdfReader.
string file = sourceFiles[f];
PdfReader reader = new PdfReader(file);
// Import the pages from the current file.
copy.AddDocument(reader);
// Create an ArrayList of bookmarks in the file being imported.
// ArrayList bookmarkLst = SimpleBookmark.GetBookmark(reader);
// Shift the pages to accomidate any pages that were imported before the current document.
// SimpleBookmark.ShiftPageNumbers(bookmarkLst, pageOffset, null);
// Fill the outlines ArrayList with each bookmark as a HashTable.
// foreach (Hashtable ht in bookmarkLst)
// {
// outlines.Add(ht);
// }
// Set the page offset to the last page imported.
//copy.Writer.SetPageSize(rec);
pageOffset += reader.NumberOfPages;
f++;
}
//4: Put the outlines from all documents under a new "Root" outline and
// set them for destination document
// copy.Writer.Outlines = GetBookmarks("Root", ((Hashtable)outlines[0])["Page"], outlines);
//5: Close the PdfCopyFields object.
copy.Close();
//6: Save the MemoryStream to a file.
MemoryStreamToFile(ms, destinationFile);
}
}
catch (System.Exception e)
{
System.Console.Error.WriteLine(e.Message);
System.Console.Error.WriteLine(e.StackTrace);
System.Console.ReadLine();
}
}
public static void MemoryStreamToFile(MemoryStream MS, string FileName)
{
using (FileStream fs = new FileStream(#FileName, FileMode.Create))
{
byte[] data = MS.ToArray();
fs.Write(data, 0, data.Length);
fs.Close();
}
}