Reading attachment from a secured PDF - pdf

I am working on a PDF file, which is a secured one and an excel is attached in the PDF file.
The following is the code i tried.
static void Main(string[] args)
{
Program pgm = new Program();
pgm.EmbedAttachments();
//pgm.ExtractAttachments(pgm.pdfFile);
}
private void ExtractAttachments(string _pdfFile)
{
try
{
if (!Directory.Exists(attExtPath))
Directory.CreateDirectory(attExtPath);
byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("TFAER13052016");
//byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("Password");
PdfDictionary documentNames = null;
PdfDictionary embeddedFiles = null;
PdfDictionary fileArray = null;
PdfDictionary file = null;
PRStream stream = null;
//PdfReader reader = new PdfReader(_pdfFile);
PdfReader reader = new PdfReader(_pdfFile, password);
PdfDictionary catalog = reader.Catalog;
documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));
if (documentNames != null)
{
embeddedFiles = (PdfDictionary)PdfReader.GetPdfObject(documentNames.Get(PdfName.EMBEDDEDFILES));
if (embeddedFiles != null)
{
PdfArray filespecs = embeddedFiles.GetAsArray(PdfName.NAMES);
for (int i = 0; i < filespecs.Size; i++)
{
i++;
fileArray = filespecs.GetAsDict(i);
file = fileArray.GetAsDict(PdfName.EF);
foreach (PdfName key in file.Keys)
{
stream = (PRStream)PdfReader.GetPdfObject(file.GetAsIndirectObject(key));
string attachedFileName = fileArray.GetAsString(key).ToString();
byte[] attachedFileBytes = PdfReader.GetStreamBytes(stream);
System.IO.File.WriteAllBytes(attExtPath + attachedFileName, attachedFileBytes);
}
}
}
else
throw new Exception("Unable to Read the attachment or There may be no Attachment");
}
else
{
throw new Exception("Unable to Read the document");
}
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
Console.ReadKey();
}
}
private void EmbedAttachments()
{
try
{
if (File.Exists(pdfFile))
File.Delete(pdfFile);
Document PDFD = new Document(PageSize.LETTER);
PdfWriter writer;
writer = PdfWriter.GetInstance(PDFD, new FileStream(pdfFile, FileMode.Create));
PDFD.Open();
PDFD.NewPage();
PDFD.Add(new Paragraph("This is test"));
PdfFileSpecification pfs = PdfFileSpecification.FileEmbedded(writer, #"C:\PDFReader\1.xls", "11.xls", null);
//PdfFileSpecification pfs = PdfFileSpecification.FileEmbedded(writer, attFile, "11", File.ReadAllBytes(attFile), true);
writer.AddFileAttachment(pfs);
//writer.AddAnnotation(PdfAnnotation.CreateFileAttachment(writer, new iTextSharp.text.Rectangle(100, 100, 100, 100), "File Attachment", PdfFileSpecification.FileExtern(writer, "C:\\test.xml")));
//writer.Close();
PDFD.Close();
Program pgm=new Program();
using (Stream input = new FileStream(pgm.pdfFile, FileMode.Open, FileAccess.Read, FileShare.Read))
{
using (Stream output = new FileStream(pgm.epdfFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
PdfReader reader = new PdfReader(input);
PdfEncryptor.Encrypt(reader, output, true, "Password", "secret", PdfWriter.ALLOW_SCREENREADERS);
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex.StackTrace.ToString());
Console.ReadKey();
}
}
}
The above code contains the creation of a encrypted PDF with an excel attachment and also to extract the same.
Now the real problem is with the file which I already have as a requirement document(I cannot share the file) which also has an excel attachment like my example.
But the above code works for the secured PDF which i have created but not for the actual Secured PDF.
While debugging, I found that the Issue is with the following code
documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));
In which,
catalog.Get(PdfName.NAMES)
is returned as NULL, Where as the File created by me, provides the expected output.
Please guide me on the above.
TIA.

As mkl suggested, It has been attached as an Annotated attachment. But the reference which is used in the example is provided ZipFile Method is no longer supported. Hence I found an alternate code attached below.
public void ExtractAttachments(byte[] src)
{
PRStream stream = null;
string attExtPath = #"C:\PDFReader\Extract\";
if (!Directory.Exists(attExtPath))
Directory.CreateDirectory(attExtPath);
byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("TFAER13052016");
PdfReader reader = new PdfReader(src, password);
for (int i = 1; i <= reader.NumberOfPages; i++)
{
PdfArray array = reader.GetPageN(i).GetAsArray(PdfName.ANNOTS);
if (array == null) continue;
for (int j = 0; j < array.Size; j++)
{
PdfDictionary annot = array.GetAsDict(j);
if (PdfName.FILEATTACHMENT.Equals(
annot.GetAsName(PdfName.SUBTYPE)))
{
PdfDictionary fs = annot.GetAsDict(PdfName.FS);
PdfDictionary refs = fs.GetAsDict(PdfName.EF);
foreach (PdfName name in refs.Keys)
{
//zip.AddEntry(
// fs.GetAsString(name).ToString(),
// PdfReader.GetStreamBytes((PRStream)refs.GetAsStream(name))
//);
stream = (PRStream)PdfReader.GetPdfObject(refs.GetAsIndirectObject(name));
string attachedFileName = fs.GetAsString(name).ToString();
var splitname = attachedFileName.Split('\\');
if (splitname.Length != 1)
attachedFileName = splitname[splitname.Length - 1].ToString();
byte[] attachedFileBytes = PdfReader.GetStreamBytes(stream);
System.IO.File.WriteAllBytes(attExtPath + attachedFileName, attachedFileBytes);
}
}
}
}
}
Please Let me Know if it can be achieved in any other way.
Thanks!!!

Related

iTextSharp Document Isn't Working After Deployment

i'm using iTextSharp to create a pdf document then add it as an attachment to send an email using SendGrid.
The code is working locally but after deploying the project in Azure this function stopped working for some reason. I tried to analyze the problem and i think that the document didn't fully created of attached due to the connection. I can't pin point the exact issue to solve it. Any opinions or discussion is appreciated.
Action:
public async Task<IActionResult> GeneratePDF(int? id, string recipientEmail)
{
//if id valid
if (id == null)
{
return NotFound();
}
var story = await _db.Stories.Include(s => s.Child).Include(s => s.Sentences).ThenInclude(s => s.Image).FirstOrDefaultAsync(s => s.Id == id);
if (story == null)
{
return NotFound();
}
var webRootPath = _hostingEnvironment.WebRootPath;
var path = Path.Combine(webRootPath, "dump"); //folder name
try
{
using (System.IO.MemoryStream memoryStream = new System.IO.MemoryStream())
{
iTextSharp.text.Document document = new iTextSharp.text.Document(iTextSharp.text.PageSize.A4, 10, 10, 10, 10);
PdfWriter writer = PdfWriter.GetInstance(document, memoryStream);
document.Open();
string usedFont = Path.Combine(webRootPath + "\\fonts\\", "arial.TTF");
BaseFont bf = BaseFont.CreateFont(usedFont, BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
iTextSharp.text.Font titleFont = new iTextSharp.text.Font(bf, 40);
iTextSharp.text.Font sentencesFont = new iTextSharp.text.Font(bf, 15);
iTextSharp.text.Font childNamewFont = new iTextSharp.text.Font(bf, 35);
PdfPTable T = new PdfPTable(1);
//Hide the table border
T.DefaultCell.BorderWidth = 0;
T.DefaultCell.HorizontalAlignment = 1;
T.DefaultCell.PaddingTop = 15;
T.DefaultCell.PaddingBottom = 15;
//Set RTL mode
T.RunDirection = PdfWriter.RUN_DIRECTION_RTL;
//Add our text
if (story.Title != null)
{
T.AddCell(new iTextSharp.text.Paragraph(story.Title, titleFont));
}
if (story.Child != null)
{
if (story.Child.FirstName != null && story.Child.LastName != null)
{
T.AddCell(new iTextSharp.text.Phrase(story.Child.FirstName + story.Child.LastName, childNamewFont));
}
}
if (story.Sentences != null)
{
.................
}
document.Add(T);
writer.CloseStream = false;
document.Close();
byte[] bytes = memoryStream.ToArray();
var fileName = path + "\\PDF" + DateTime.Now.ToString("yyyyMMdd-HHMMss") + ".pdf";
using (FileStream fs = new FileStream(fileName, FileMode.Create))
{
fs.Write(bytes, 0, bytes.Length);
}
memoryStream.Position = 0;
memoryStream.Close();
//Send generated pdf as attchment
// Create the file attachment for this email message.
var attachment = Convert.ToBase64String(bytes);
var client = new SendGridClient(Options.SendGridKey);
var msg = new SendGridMessage();
msg.From = new EmailAddress(SD.DefaultEmail, SD.DefaultEmail);
msg.Subject = story.Title;
msg.PlainTextContent = "................";
msg.HtmlContent = "..................";
msg.AddTo(new EmailAddress(recipientEmail));
msg.AddAttachment("Story.pdf", attachment);
try
{
await client.SendEmailAsync(msg);
}
catch (Exception ex)
{
Console.WriteLine("{0} First exception caught.", ex);
}
//Remove form root
if (System.IO.File.Exists(fileName))
{
System.IO.File.Delete(fileName);
}
}
}
catch (FileNotFoundException e)
{
Console.WriteLine($"The file was not found: '{e}'");
}
catch (DirectoryNotFoundException e)
{
Console.WriteLine($"The directory was not found: '{e}'");
}
catch (IOException e)
{
Console.WriteLine($"The file could not be opened: '{e}'");
}
return RedirectToAction("Details", new { id = id });
}
try to edit usedfont variable as bellow :
var usedfont = Path.Combine(webRootPath ,#"\fonts\arial.TTF")
It turns out that the problem is far from iTextSharp. I did a remote debugging from this article.
Two parts of the code was causing the problem.
First, for some reason the folder "dump" was not created on Azure wwwroot folder while locally it is. so, i added these lines:
var webRootPath = _hostingEnvironment.WebRootPath;
var path = Path.Combine(webRootPath, "dump");
if (!Directory.Exists(path)) //Here
Directory.CreateDirectory(path);
Second, after debugging it shows that creating the file was failing every time. I replaced the following lines:
using (FileStream fs = new FileStream(fileName, FileMode.Create))
{
fs.Write(bytes, 0, bytes.Length);
}
memoryStream.Position = 0;
memoryStream.Close();
With:
using (FileStream fs = new FileStream(fileName, FileMode.Create))
using (var binaryWriter = new BinaryWriter(fs))
{
binaryWriter.Write(bytes, 0, bytes.Length);
binaryWriter.Close();
}
memoryStream.Close();
Hope this post helps someone.

iTextSharp: How to add page number to multiple pdf file after merged them together

I'm using iTextSharp, version 5.5.8.0 in a Windows Form application. Here is the code I used to merge two pdf files. My task is to add page number in merged file.
private void MergePDFFiles()
{
string fileSuffix = DateTime.Now.ToString("yyyyMMddHHmmss") + ".pdf";
string[] lstFiles = new string[2];
lstFiles[0] = #"/File_1.pdf";
lstFiles[1] = #"/File_2.pdf";
PdfReader reader = null;
Document sourceDocument = null;
PdfCopy pdfCopyProvider = null;
PdfImportedPage importedPage;
string outputPdfPath = #"/MergedPageNo_" + fileSuffix;
sourceDocument = new Document();
pdfCopyProvider = new PdfCopy(sourceDocument, new System.IO.FileStream(outputPdfPath, System.IO.FileMode.Create));
sourceDocument.Open();
try
{
for (int f = 0; f < lstFiles.Length - 0; f++)
{
int pages = get_pageCount(lstFiles[f]);
reader = new PdfReader(lstFiles[f]);
for (int i = 1; i <= pages; i++)
{
importedPage = pdfCopyProvider.GetImportedPage(reader, i);
pdfCopyProvider.AddPage(importedPage);
}
reader.Close();
}
sourceDocument.Close();
}
catch (Exception ex)
{
throw ex;
}
}`

Merging PDFs using iTextSharp removes Trim Box Detail

I am trying to use iTextSharp to merge 2 or more PDF files. However I am not getting any details about the TrimBox. Performing the code below on the PDF (which was merged) always return NULL
Rectangle rect = reader.GetBoxSize(1, "trim");
This is the code for merging.
public void Merge(List<String> InFiles, String OutFile)
{
using (FileStream stream = new FileStream(OutFile, FileMode.Create))
using (Document doc = new Document())
using (PdfCopy pdf = new PdfCopy(doc, stream))
{
doc.Open();
PdfReader reader = null;
PdfImportedPage page = null;
InFiles.ForEach(file =>
{
reader = new PdfReader(file);
for (int i = 0; i < reader.NumberOfPages; i++)
{
page = pdf.GetImportedPage(reader, i + 1);
pdf.AddPage(page);
}
pdf.FreeReader(reader);
reader.Close();
});
}
}
How to keep I keep the box information after the merge?
-Alan-
Here is the code I created to merge Portrait and Landscape docs using iTextSharp. It works rather well.
public void MergeFiles(System.Collections.Generic.List<string> sourceFiles, string destinationFile)
{
Document document=null;
if (System.IO.File.Exists(destinationFile))
System.IO.File.Delete(destinationFile);
try
{
PdfCopy writer = null;
int numberOfPages=0;
foreach(string sourceFile in sourceFiles)
{
PdfReader reader = new PdfReader(sourceFile);
reader.ConsolidateNamedDestinations();
numberOfPages = reader.NumberOfPages;
if(document==null)
{
document = new Document(reader.GetPageSizeWithRotation(1));
writer = new PdfCopy(document, new FileStream(destinationFile, FileMode.Create));
document.Open();
}
for (int x = 1;x <= numberOfPages;x++ )
{
if (writer != null)
{
PdfImportedPage page = writer.GetImportedPage(reader, x);
writer.AddPage(page);
}
}
PRAcroForm form = reader.AcroForm;
if (form != null && writer != null)
writer.CopyAcroForm(reader);
}
}
finally
{
if (document != null && document.IsOpen())
document.Close();
}
}

I can't flatten merged PDF with itextsharp

I have 3 pdfs templates that i Fill out and save to a memorystream them merge them together into 1 pdf into a memorystream. The resulting merged pdf is non flattenned. I tried reading the merged pdf and flattening it but it say "pdf header signature not found". Is there a way to flattened merged pdf from a memorystream.
this is what I tried so far without success.
var mergedPdf = MergePDFs(AllPdfs);
MemoryStream flattenedMergedPdf = new MemoryStream();
PdfReader Finalpdf = new PdfReader(mergedPdf);
PdfStamper StampFinalMerged = new PdfStamper(Finalpdf,flattenedMergedPdf);
StampFinalMerged.FormFlattening=true;
return flattenedMergedPdf;
the method i use to merge the pdfs(found on SO) is below:
public MemoryStream MergePDFs(List<byte[]> pdfFiles)
{
if (pdfFiles.Count > 1)
{
PdfReader finalPdf;
Document pdfContainer;
PdfCopy pdfCopy;
MemoryStream msFinalPdf = new MemoryStream();
finalPdf = new PdfReader(pdfFiles[0]);
pdfContainer = new Document();
pdfCopy = new PdfSmartCopy(pdfContainer, msFinalPdf);
pdfContainer.Open();
for (int k = 0; k < pdfFiles.Count; k++)
{
finalPdf = new PdfReader(pdfFiles[k]);
for (int i = 1; i < finalPdf.NumberOfPages + 1; i++)
{
((PdfSmartCopy)pdfCopy).AddPage(pdfCopy.GetImportedPage(finalPdf, i));
}
pdfCopy.FreeReader(finalPdf);
}
//tried this here but no success
// PdfReader FinalRead = new PdfReader(finalPdf);
//MemoryStream FinalMS = new MemoryStream();
//var finalStamper = new PdfStamper(FinalRead, FinalMS);
//finalStamper.FormFlattening = true;
pdfCopy.CloseStream = false;
pdfCopy.Close();
finalPdf.Close();
//return FinalMS
return msFinalPdf;
}
Any help will be much appreciated. Thanks
You can try another approach to flatten PDF templates first then merge them into a final PDF file:
string newFile = "... path to the merged pdf file ...";
string[] pdfTemplates = new string[3];
string pdfTemplates[0] = "... path to the 1st pdf template ...";
string pdfTemplates[1] = "... path to the 2nd pdf template ...";
string pdfTemplates[2] = "... path to the 3rd pdf template ...";
string[] pdfFiles = new string[3];
string pdfFiles[0] = "... path to the 1st temp pdf file ...";
string pdfFiles[1] = "... path to the 2nd temp pdf file ...";
string pdfFiles[2] = "... path to the 3rd temp pdf file ...";
for (int i = 0; i < 3; i++)
{
PdfReader pdfReader = new PdfReader(pdfTemplates[i]);
PdfStamper pdfStamper = new PdfStamper(pdfReader, new FileStream(pdfFiles[i], FileMode.Create));
pdfStamper.FormFlattening = true;
pdfStamper.Close();
}
MergeFiles(newFile, pdfFiles);
for (int i = 0; i < 3; i++)
{
FileInfo fi = new FileInfo(pdfFiles[i]);
fi.Delete();
}
private void MergeFiles(string destinationFile, string[] sourceFiles)
{
try
{
int f = 0;
PdfReader reader = new PdfReader(sourceFiles[f]);
int n = reader.NumberOfPages;
Document document = new Document(reader.GetPageSizeWithRotation(1));
PdfWriter writer = PdfWriter.GetInstance(document, new FileStream(destinationFile, FileMode.Create));
document.Open();
PdfContentByte cb = writer.DirectContent;
PdfImportedPage page;
int rotation;
while (f < sourceFiles.Length)
{
int i = 0;
while (i < n)
{
i++;
document.SetPageSize(reader.GetPageSizeWithRotation(i));
document.NewPage();
page = writer.GetImportedPage(reader, i);
rotation = reader.GetPageRotation(i);
if (rotation == 90 || rotation == 270)
{
cb.AddTemplate(page, 0, -1f, 1f, 0, 0, reader.GetPageSizeWithRotation(i).Height);
}
else
{
cb.AddTemplate(page, 1f, 0, 0, 1f, 0, 0);
}
}
f++;
if (f < sourceFiles.Length)
{
reader = new PdfReader(sourceFiles[f]);
n = reader.NumberOfPages;
}
}
document.Close();
}
catch (Exception ex)
{
MessageBox.Show(ex.Message, "Error", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}

Merging PDFs with ITextSharp

What is the optimum way to merge 2 PDF files with ITextSharp in C#? I'm using ASP.NET/.NET3.5.
public static void Merge(List<String> InFiles, String OutFile)
{
using (FileStream stream = new FileStream(OutFile, FileMode.Create))
using (Document doc = new Document())
using (PdfCopy pdf = new PdfCopy(doc, stream))
{
doc.Open();
PdfReader reader = null;
PdfImportedPage page = null;
//fixed typo
InFiles.ForEach(file =>
{
reader = new PdfReader(file);
for (int i = 0; i < reader.NumberOfPages; i++)
{
page = pdf.GetImportedPage(reader, i + 1);
pdf.AddPage(page);
}
pdf.FreeReader(reader);
reader.Close();
});
}
}
The last answer works if you don't want to delete the original files. In my case, I want to delete and when I tried I got exception. My solution is:
public static bool MergePDFs(List<String> InFiles, String OutFile)
{
bool merged = true;
try
{
List<PdfReader> readerList = new List<PdfReader>();
foreach (string filePath in InFiles)
{
PdfReader pdfReader = new PdfReader(filePath);
readerList.Add(pdfReader);
}
//Define a new output document and its size, type
Document document = new Document(PageSize.A4, 0, 0, 0, 0);
//Create blank output pdf file and get the stream to write on it.
PdfWriter writer = PdfWriter.GetInstance(document, new FileStream(OutFile, FileMode.Create));
document.Open();
foreach (PdfReader reader in readerList)
{
PdfReader.unethicalreading = true;
for (int i = 1; i <= reader.NumberOfPages; i++)
{
PdfImportedPage page = writer.GetImportedPage(reader, i);
document.Add(iTextSharp.text.Image.GetInstance(page));
}
}
document.Close();
foreach (PdfReader reader in readerList)
{
reader.Close();
}
}
catch (Exception ex)
{
merged = false;
}
return merged;
}
I copied the code from Original Code