Merging PDFs with ITextSharp - pdf

What is the optimum way to merge 2 PDF files with ITextSharp in C#? I'm using ASP.NET/.NET3.5.

public static void Merge(List<String> InFiles, String OutFile)
{
using (FileStream stream = new FileStream(OutFile, FileMode.Create))
using (Document doc = new Document())
using (PdfCopy pdf = new PdfCopy(doc, stream))
{
doc.Open();
PdfReader reader = null;
PdfImportedPage page = null;
//fixed typo
InFiles.ForEach(file =>
{
reader = new PdfReader(file);
for (int i = 0; i < reader.NumberOfPages; i++)
{
page = pdf.GetImportedPage(reader, i + 1);
pdf.AddPage(page);
}
pdf.FreeReader(reader);
reader.Close();
});
}
}

The last answer works if you don't want to delete the original files. In my case, I want to delete and when I tried I got exception. My solution is:
public static bool MergePDFs(List<String> InFiles, String OutFile)
{
bool merged = true;
try
{
List<PdfReader> readerList = new List<PdfReader>();
foreach (string filePath in InFiles)
{
PdfReader pdfReader = new PdfReader(filePath);
readerList.Add(pdfReader);
}
//Define a new output document and its size, type
Document document = new Document(PageSize.A4, 0, 0, 0, 0);
//Create blank output pdf file and get the stream to write on it.
PdfWriter writer = PdfWriter.GetInstance(document, new FileStream(OutFile, FileMode.Create));
document.Open();
foreach (PdfReader reader in readerList)
{
PdfReader.unethicalreading = true;
for (int i = 1; i <= reader.NumberOfPages; i++)
{
PdfImportedPage page = writer.GetImportedPage(reader, i);
document.Add(iTextSharp.text.Image.GetInstance(page));
}
}
document.Close();
foreach (PdfReader reader in readerList)
{
reader.Close();
}
}
catch (Exception ex)
{
merged = false;
}
return merged;
}
I copied the code from Original Code

Related

Reading attachment from a secured PDF

I am working on a PDF file, which is a secured one and an excel is attached in the PDF file.
The following is the code i tried.
static void Main(string[] args)
{
Program pgm = new Program();
pgm.EmbedAttachments();
//pgm.ExtractAttachments(pgm.pdfFile);
}
private void ExtractAttachments(string _pdfFile)
{
try
{
if (!Directory.Exists(attExtPath))
Directory.CreateDirectory(attExtPath);
byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("TFAER13052016");
//byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("Password");
PdfDictionary documentNames = null;
PdfDictionary embeddedFiles = null;
PdfDictionary fileArray = null;
PdfDictionary file = null;
PRStream stream = null;
//PdfReader reader = new PdfReader(_pdfFile);
PdfReader reader = new PdfReader(_pdfFile, password);
PdfDictionary catalog = reader.Catalog;
documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));
if (documentNames != null)
{
embeddedFiles = (PdfDictionary)PdfReader.GetPdfObject(documentNames.Get(PdfName.EMBEDDEDFILES));
if (embeddedFiles != null)
{
PdfArray filespecs = embeddedFiles.GetAsArray(PdfName.NAMES);
for (int i = 0; i < filespecs.Size; i++)
{
i++;
fileArray = filespecs.GetAsDict(i);
file = fileArray.GetAsDict(PdfName.EF);
foreach (PdfName key in file.Keys)
{
stream = (PRStream)PdfReader.GetPdfObject(file.GetAsIndirectObject(key));
string attachedFileName = fileArray.GetAsString(key).ToString();
byte[] attachedFileBytes = PdfReader.GetStreamBytes(stream);
System.IO.File.WriteAllBytes(attExtPath + attachedFileName, attachedFileBytes);
}
}
}
else
throw new Exception("Unable to Read the attachment or There may be no Attachment");
}
else
{
throw new Exception("Unable to Read the document");
}
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
Console.ReadKey();
}
}
private void EmbedAttachments()
{
try
{
if (File.Exists(pdfFile))
File.Delete(pdfFile);
Document PDFD = new Document(PageSize.LETTER);
PdfWriter writer;
writer = PdfWriter.GetInstance(PDFD, new FileStream(pdfFile, FileMode.Create));
PDFD.Open();
PDFD.NewPage();
PDFD.Add(new Paragraph("This is test"));
PdfFileSpecification pfs = PdfFileSpecification.FileEmbedded(writer, #"C:\PDFReader\1.xls", "11.xls", null);
//PdfFileSpecification pfs = PdfFileSpecification.FileEmbedded(writer, attFile, "11", File.ReadAllBytes(attFile), true);
writer.AddFileAttachment(pfs);
//writer.AddAnnotation(PdfAnnotation.CreateFileAttachment(writer, new iTextSharp.text.Rectangle(100, 100, 100, 100), "File Attachment", PdfFileSpecification.FileExtern(writer, "C:\\test.xml")));
//writer.Close();
PDFD.Close();
Program pgm=new Program();
using (Stream input = new FileStream(pgm.pdfFile, FileMode.Open, FileAccess.Read, FileShare.Read))
{
using (Stream output = new FileStream(pgm.epdfFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
PdfReader reader = new PdfReader(input);
PdfEncryptor.Encrypt(reader, output, true, "Password", "secret", PdfWriter.ALLOW_SCREENREADERS);
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex.StackTrace.ToString());
Console.ReadKey();
}
}
}
The above code contains the creation of a encrypted PDF with an excel attachment and also to extract the same.
Now the real problem is with the file which I already have as a requirement document(I cannot share the file) which also has an excel attachment like my example.
But the above code works for the secured PDF which i have created but not for the actual Secured PDF.
While debugging, I found that the Issue is with the following code
documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));
In which,
catalog.Get(PdfName.NAMES)
is returned as NULL, Where as the File created by me, provides the expected output.
Please guide me on the above.
TIA.
As mkl suggested, It has been attached as an Annotated attachment. But the reference which is used in the example is provided ZipFile Method is no longer supported. Hence I found an alternate code attached below.
public void ExtractAttachments(byte[] src)
{
PRStream stream = null;
string attExtPath = #"C:\PDFReader\Extract\";
if (!Directory.Exists(attExtPath))
Directory.CreateDirectory(attExtPath);
byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("TFAER13052016");
PdfReader reader = new PdfReader(src, password);
for (int i = 1; i <= reader.NumberOfPages; i++)
{
PdfArray array = reader.GetPageN(i).GetAsArray(PdfName.ANNOTS);
if (array == null) continue;
for (int j = 0; j < array.Size; j++)
{
PdfDictionary annot = array.GetAsDict(j);
if (PdfName.FILEATTACHMENT.Equals(
annot.GetAsName(PdfName.SUBTYPE)))
{
PdfDictionary fs = annot.GetAsDict(PdfName.FS);
PdfDictionary refs = fs.GetAsDict(PdfName.EF);
foreach (PdfName name in refs.Keys)
{
//zip.AddEntry(
// fs.GetAsString(name).ToString(),
// PdfReader.GetStreamBytes((PRStream)refs.GetAsStream(name))
//);
stream = (PRStream)PdfReader.GetPdfObject(refs.GetAsIndirectObject(name));
string attachedFileName = fs.GetAsString(name).ToString();
var splitname = attachedFileName.Split('\\');
if (splitname.Length != 1)
attachedFileName = splitname[splitname.Length - 1].ToString();
byte[] attachedFileBytes = PdfReader.GetStreamBytes(stream);
System.IO.File.WriteAllBytes(attExtPath + attachedFileName, attachedFileBytes);
}
}
}
}
}
Please Let me Know if it can be achieved in any other way.
Thanks!!!

Merging PDFs using iTextSharp removes Trim Box Detail

I am trying to use iTextSharp to merge 2 or more PDF files. However I am not getting any details about the TrimBox. Performing the code below on the PDF (which was merged) always return NULL
Rectangle rect = reader.GetBoxSize(1, "trim");
This is the code for merging.
public void Merge(List<String> InFiles, String OutFile)
{
using (FileStream stream = new FileStream(OutFile, FileMode.Create))
using (Document doc = new Document())
using (PdfCopy pdf = new PdfCopy(doc, stream))
{
doc.Open();
PdfReader reader = null;
PdfImportedPage page = null;
InFiles.ForEach(file =>
{
reader = new PdfReader(file);
for (int i = 0; i < reader.NumberOfPages; i++)
{
page = pdf.GetImportedPage(reader, i + 1);
pdf.AddPage(page);
}
pdf.FreeReader(reader);
reader.Close();
});
}
}
How to keep I keep the box information after the merge?
-Alan-
Here is the code I created to merge Portrait and Landscape docs using iTextSharp. It works rather well.
public void MergeFiles(System.Collections.Generic.List<string> sourceFiles, string destinationFile)
{
Document document=null;
if (System.IO.File.Exists(destinationFile))
System.IO.File.Delete(destinationFile);
try
{
PdfCopy writer = null;
int numberOfPages=0;
foreach(string sourceFile in sourceFiles)
{
PdfReader reader = new PdfReader(sourceFile);
reader.ConsolidateNamedDestinations();
numberOfPages = reader.NumberOfPages;
if(document==null)
{
document = new Document(reader.GetPageSizeWithRotation(1));
writer = new PdfCopy(document, new FileStream(destinationFile, FileMode.Create));
document.Open();
}
for (int x = 1;x <= numberOfPages;x++ )
{
if (writer != null)
{
PdfImportedPage page = writer.GetImportedPage(reader, x);
writer.AddPage(page);
}
}
PRAcroForm form = reader.AcroForm;
if (form != null && writer != null)
writer.CopyAcroForm(reader);
}
}
finally
{
if (document != null && document.IsOpen())
document.Close();
}
}

Highlight keywords in a pdf using itextsharp and render it to the browser

I have an existing pdf .I am using itextSharp to open the document and highlight keywords dynamically and when I save this into a file it works fine, but when I write it into a memory Stream and try to render it on the browser the highlights are not there.
Here is the code
public void SearchPDF()
{
//Create a new file from our test file with highlighting
string highLightFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "Highlighted.pdf");
// Stream
//Bind a reader and stamper to our test PDF
var testFile = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "201400699428__1_00000.pdf");
PdfReader reader = new PdfReader(testFile);
var numberOfPages = reader.NumberOfPages;
System.Globalization.CompareOptions cmp = System.Globalization.CompareOptions.None;
//Create an instance of our strategy
MemoryStream m = new MemoryStream();
//using (var fs = new FileStream(highLightFile, FileMode.Create, FileAccess.Write, FileShare.None))
//{
using (Document document = new Document(PageSize.A4))
{
PdfWriter.GetInstance(document, m);
using (PdfStamper stamper = new PdfStamper(reader, m))
{
//document.Open();
for (var currentPageIndex = 1; currentPageIndex <= numberOfPages; currentPageIndex++)
{
MyLocationTextExtractionStrategy strategyTest = new MyLocationTextExtractionStrategy("Ritual Bath", cmp);
var listener = new MyRenderListener();
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
//Parse page 1 of the document above
using (var r = new PdfReader(testFile))
{
var ex = PdfTextExtractor.GetTextFromPage(r, currentPageIndex, strategyTest);
}
//Loop through each chunk found
foreach (var p in strategyTest.myPoints)
{
//Console.WriteLine(string.Format("Found text {0} at {1}x{2}", p.Text, p.Rect.Left, p.Rect.Bottom));
float[] quad = { p.Rect.Left, p.Rect.Bottom, p.Rect.Right, p.Rect.Bottom, p.Rect.Left, p.Rect.Top, p.Rect.Right, p.Rect.Top };
Rectangle rect = new Rectangle(p.Rect.Left,
p.Rect.Top,
p.Rect.Bottom,
p.Rect.Right);
PdfAnnotation highlight = PdfAnnotation.CreateMarkup(stamper.Writer, rect, null, PdfAnnotation.MARKUP_HIGHLIGHT, quad);
//Set the color
highlight.Color = BaseColor.YELLOW;
//Add the annotation
stamper.AddAnnotation(highlight, 1);
}
}
}
}
//}
//Response.ContentType = "application/pdf";
//Response.AddHeader("content-disposition", "attachment; filename=download_report.pdf");
//Response.BinaryWrite(byteContent);
//Response.End();
HttpContext.Current.Response.Buffer = false;
HttpContext.Current.Response.Clear();
HttpContext.Current.Response.ClearContent();
HttpContext.Current.Response.ClearHeaders();
HttpContext.Current.Response.ContentType = "application/pdf";
HttpContext.Current.Response.AppendHeader("Content-Disposition", "inline;filename=download_report.pdf");
HttpContext.Current.Response.AppendHeader("Content-Length", m.ToArray().Length.ToString());
HttpContext.Current.Response.OutputStream.Write(m.ToArray(), 0, m.ToArray().Length);
HttpContext.Current.Response.OutputStream.Flush();
HttpContext.Current.Response.OutputStream.Close();
HttpContext.Current.Response.End();
}

I can't flatten merged PDF with itextsharp

I have 3 pdfs templates that i Fill out and save to a memorystream them merge them together into 1 pdf into a memorystream. The resulting merged pdf is non flattenned. I tried reading the merged pdf and flattening it but it say "pdf header signature not found". Is there a way to flattened merged pdf from a memorystream.
this is what I tried so far without success.
var mergedPdf = MergePDFs(AllPdfs);
MemoryStream flattenedMergedPdf = new MemoryStream();
PdfReader Finalpdf = new PdfReader(mergedPdf);
PdfStamper StampFinalMerged = new PdfStamper(Finalpdf,flattenedMergedPdf);
StampFinalMerged.FormFlattening=true;
return flattenedMergedPdf;
the method i use to merge the pdfs(found on SO) is below:
public MemoryStream MergePDFs(List<byte[]> pdfFiles)
{
if (pdfFiles.Count > 1)
{
PdfReader finalPdf;
Document pdfContainer;
PdfCopy pdfCopy;
MemoryStream msFinalPdf = new MemoryStream();
finalPdf = new PdfReader(pdfFiles[0]);
pdfContainer = new Document();
pdfCopy = new PdfSmartCopy(pdfContainer, msFinalPdf);
pdfContainer.Open();
for (int k = 0; k < pdfFiles.Count; k++)
{
finalPdf = new PdfReader(pdfFiles[k]);
for (int i = 1; i < finalPdf.NumberOfPages + 1; i++)
{
((PdfSmartCopy)pdfCopy).AddPage(pdfCopy.GetImportedPage(finalPdf, i));
}
pdfCopy.FreeReader(finalPdf);
}
//tried this here but no success
// PdfReader FinalRead = new PdfReader(finalPdf);
//MemoryStream FinalMS = new MemoryStream();
//var finalStamper = new PdfStamper(FinalRead, FinalMS);
//finalStamper.FormFlattening = true;
pdfCopy.CloseStream = false;
pdfCopy.Close();
finalPdf.Close();
//return FinalMS
return msFinalPdf;
}
Any help will be much appreciated. Thanks
You can try another approach to flatten PDF templates first then merge them into a final PDF file:
string newFile = "... path to the merged pdf file ...";
string[] pdfTemplates = new string[3];
string pdfTemplates[0] = "... path to the 1st pdf template ...";
string pdfTemplates[1] = "... path to the 2nd pdf template ...";
string pdfTemplates[2] = "... path to the 3rd pdf template ...";
string[] pdfFiles = new string[3];
string pdfFiles[0] = "... path to the 1st temp pdf file ...";
string pdfFiles[1] = "... path to the 2nd temp pdf file ...";
string pdfFiles[2] = "... path to the 3rd temp pdf file ...";
for (int i = 0; i < 3; i++)
{
PdfReader pdfReader = new PdfReader(pdfTemplates[i]);
PdfStamper pdfStamper = new PdfStamper(pdfReader, new FileStream(pdfFiles[i], FileMode.Create));
pdfStamper.FormFlattening = true;
pdfStamper.Close();
}
MergeFiles(newFile, pdfFiles);
for (int i = 0; i < 3; i++)
{
FileInfo fi = new FileInfo(pdfFiles[i]);
fi.Delete();
}
private void MergeFiles(string destinationFile, string[] sourceFiles)
{
try
{
int f = 0;
PdfReader reader = new PdfReader(sourceFiles[f]);
int n = reader.NumberOfPages;
Document document = new Document(reader.GetPageSizeWithRotation(1));
PdfWriter writer = PdfWriter.GetInstance(document, new FileStream(destinationFile, FileMode.Create));
document.Open();
PdfContentByte cb = writer.DirectContent;
PdfImportedPage page;
int rotation;
while (f < sourceFiles.Length)
{
int i = 0;
while (i < n)
{
i++;
document.SetPageSize(reader.GetPageSizeWithRotation(i));
document.NewPage();
page = writer.GetImportedPage(reader, i);
rotation = reader.GetPageRotation(i);
if (rotation == 90 || rotation == 270)
{
cb.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height);
}
else
{
cb.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
}
}
f++;
if (f < sourceFiles.Length)
{
reader = new PdfReader(sourceFiles[f]);
n = reader.NumberOfPages;
}
}
document.Close();
}
catch (Exception ex)
{
MessageBox.Show(ex.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}

iTextSharp merge and preserve plugins

I have a PDF that has a plugin control on one page. I need to merge it with another pdf but when I do, the plugin is redendered as a blank rectangle. Is there a way to merge and preserve the plugin control? If important, the plugin is a Dessault Systemes 3D Via Composer Player control as installed from http://www.3ds.com/products/3dvia/3dvia-composer/resource-center/
The code I'm using is simple writer getinstance, contentbyte directcontent, getimportedpage, addtemplate. Nothing fancy.
FileStream docStream = new FileStream(#"C:\Temp\Merged.pdf", FileMode.Create);
Document newDocument = new Document(PageSize.A4.Rotate());
PdfWriter pdfWriter = PdfWriter.GetInstance(newDocument, docStream);
try
{
newDocument.Open();
PdfContentByte pdfContentByte = pdfWriter.DirectContent;
newDocument.NewPage();
PdfReader mainPage = new PdfReader(#"C:\Temp\PageWithPlugin.pdf");
PdfImportedPage importedPage1 = pdfWriter.GetImportedPage(mainPage, 1);
pdfContentByte.AddTemplate(importedPage1, 0, 0);
mainPage.Close();
PdfReader smgPages = new PdfReader(#"C:\Temp\MorePages.pdf");
for (int page = 1; page <= smgPage.NumberOfPages; page++)
{
newDocument.NewPage();
PdfImportedPage importedPage = pdfWriter.GetImportedPage(smgPages, page);
pdfContentByte.AddTemplate(importedPage, 0, 0);
}
smgPages.Close();
}
finally
{
docStream.Flush();
if (newDocument != null)
newDocument.Close();
docStream.Close();
}
Give this a try.
List<byte[]> fileList = new List<byte[]>();
using (FileStream fileSteam = File.OpenRead((#"C:\Temp\PageWithPlugin.pdf")))
{
Byte[] byteArray = new byte[fileSteam.Length];
fileSteam.Read(byteArray, 0, byteArray.Length);
fileList.Add(byteArray);
}
using (FileStream fileSteam = File.OpenRead((#"C:\Temp\MorePages.pdf")))
{
Byte[] byteArray = new byte[fileSteam.Length];
fileSteam.Read(byteArray, 0, byteArray.Length);
fileList.Add(byteArray);
}
using(MemoryStream msOutput = new MemoryStream())
{
PdfReader pdfFile = new PdfReader(fileList[0]);
Document doc = new Document();
PdfWriter pCopy = new PdfSmartCopy(doc, msOutput);
doc.Open();
for (int k = 0; k < fileList.Count; k++)
{
for (int i = 1; i < pdfFile.NumberOfPages + 1; i++)
{
pdfFile = new PdfReader(fileList[k]);
((PdfSmartCopy)pCopy).AddPage(pCopy.GetImportedPage(pdfFile, i));
pCopy.FreeReader(pdfFile);
}
}
pdfFile.Close();
pCopy.Close();
doc.Close();
fileList.Clear();
byte[] form = msOutput.ToArray();
using (FileStream fileSteam = new FileStream(#"C:\Temp\Merged.pdf", FileMode.Create))
{
fileStream.Write(form, 0, form.Length);
}
}
Not to look a gift horse in the mouth but... In case anyone else uses the solution, there is a small problem with the k loop. The numberofpages would be off by one document after the zeroth one and the pdfReader keeps getting freed and reread for every page, which isn't necessary. I think this is an improvement for that part of the code:
using (MemoryStream msOutput = new MemoryStream())
{
Document doc = new Document();
PdfWriter pCopy = new PdfCopy(doc, msOutput);
doc.Open();
for (int k = 0; k < fileList.Count; k++)
{
PdfReader pdfFile = new PdfReader(fileList[k]);
for (int i = 1; i < pdfFile.NumberOfPages + 1; i++)
{
((PdfCopy)pCopy).AddPage(pCopy.GetImportedPage(pdfFile, i));
}
pCopy.FreeReader(pdfFile);
pdfFile.Close();
}
pCopy.Close();
doc.Close();
fileList.Clear();