How to merge PDFs into a PDF Portfolio? - pdf

I am looking for the functionality that creates PDF Portfolios:
The image shows the free adobe reader that can be downloaded from Adobe (duh!). When I open this particular PDF, I was surprised that it has all these Layout, Files and Attachment features. It is definitely not the normal "PDF merge". It is more like a package with multiple PDFs.
Can itextsharp do this? What is the search term for this PDF functionality?

The term you're looking for is PDF Portfolios. You can create PDFs like this with iTextSharp. Here are a couple of C# examples from the iText book:
Chapter16 - KubrickCollection
Chapter16 - KubrickMovies
If you choose to download the KubrickMovies result file, change the extension to ".pdf". Just noticed it now - will try and fix the error this weekend.

To generate pdf portfolio (using iTextSharp) first we need to create a collection, then store it.
Example read one pdf file, create from it a collection of 2 files hello.pdf,united_states.pdf finally store as Test.pdf
static void Main(string[] args)
{
Document pdfDoc = null;
FileStream fstr = null;
try
{
pdfDoc = new Document(PageSize.A4);
fstr = new FileStream("Test.pdf", FileMode.Create);
var pdfWriter = PdfWriter.GetInstance(pdfDoc, fstr);
pdfDoc.Open();
pdfDoc.Add(new Chunk());
PdfCollection collection = new PdfCollection(PdfCollection.TILE);
var filePath = #"somePDF.pdf";
var fileInfo = new FileInfo(filePath);
var pdfDictionary = new PdfDictionary();
pdfDictionary.Put(PdfName.Moddate, new PdfDate(fileInfo.LastWriteTime));
pdfWriter.Collection = collection;
PdfFileSpecification fileSpec = PdfFileSpecification.FileEmbedded(
pdfWriter,
filePath,
fileInfo.Name,
null
);
pdfWriter.AddFileAttachment("united_states.pdf", fileSpec);
fileSpec = PdfFileSpecification.FileEmbedded(pdfWriter, filePath, fileInfo.Name, null);
pdfWriter.AddFileAttachment("hello.pdf", fileSpec);
pdfDoc.Close();
}
finally
{
pdfDoc.Close();
pdfDoc = null;
fstr.Close();
}
}

Here is the simple sample to show how we can attach files to a new PDF file:
using System.Diagnostics;
using System.IO;
using iTextSharp.text;
using iTextSharp.text.pdf;
namespace PDFAttachment
{
class Program
{
static void Main(string[] args)
{
using (var pdfDoc = new Document(PageSize.A4))
{
var pdfWriter = PdfWriter.GetInstance(pdfDoc, new FileStream("Test.pdf", FileMode.Create));
pdfDoc.Open();
pdfDoc.Add(new Phrase("Test"));
var filePath = #"C:\path\logo.png";
var fileInfo = new FileInfo(filePath);
var pdfDictionary = new PdfDictionary();
pdfDictionary.Put(PdfName.MODDATE, new PdfDate(fileInfo.LastWriteTime));
var fs = PdfFileSpecification.FileEmbedded(pdfWriter, filePath, fileInfo.Name, null, true, null, pdfDictionary);
pdfWriter.AddFileAttachment("desc.", fs);
}
Process.Start("Test.pdf");
}
}
}
Or to an existing PDF file:
using System.Diagnostics;
using System.IO;
using iTextSharp.text;
using iTextSharp.text.pdf;
namespace PDFAttachment
{
class Program
{
static void Main(string[] args)
{
var reader = new PdfReader("Test.pdf");
using (var stamper = new PdfStamper(reader, new FileStream("newTest.pdf", FileMode.Create)))
{
var filePath = #"C:\path\logo.png";
addAttachment(stamper, filePath, "desc.");
stamper.Close();
}
Process.Start("newTest.pdf");
}
private static void addAttachment(PdfStamper stamper, string filePath, string description)
{
var fileInfo = new FileInfo(filePath);
var pdfDictionary = new PdfDictionary();
pdfDictionary.Put(PdfName.MODDATE, new PdfDate(fileInfo.LastWriteTime));
var pdfWriter = stamper.Writer;
var fs = PdfFileSpecification.FileEmbedded(pdfWriter, filePath, fileInfo.Name, null, true, null, pdfDictionary);
stamper.AddFileAttachment(description, fs);
}
}
}

Related

Fill XFA without breaking usage rights

I have an XFA form that I can successfully fill in by extracting the XML modifying and writing back. Works great if you have the full Adobe Acrobat, but fails with Adobe Reader. I have seen various questions on the same thing with answers but they were some time ago so updating an XFA that is readable by Adobe Reader may no longer be doable?
I use this code below and I've utilised the StampingProperties of append as in the iText example but still failing. I'm using iText 7.1.15.
//open file and write to temp one
PdfDocument pdf = new(new PdfReader(FileToProcess), new PdfWriter(NewPDF), new StampingProperties().UseAppendMode());
PdfAcroForm form = PdfAcroForm.GetAcroForm(pdf, true);
XfaForm xfa = form.GetXfaForm();
XElement node = xfa.GetDatasetsNode();
IEnumerable<XNode> list = node.Nodes();
foreach (XNode item in list)
{
if (item is XElement element && "data".Equals(element.Name.LocalName))
{
node = element;
break;
}
}
XmlWriterSettings settings = new() { Indent = true };
using XmlWriter writer = XmlWriter.Create(XMLOutput, settings);
{
node.WriteTo(writer);
writer.Flush();
writer.Close();
}
//We now how to strip an extra xfa line if updating
if(update)
{
string TempXML= CSTrackerHelper.MakePath($"{AppContext.BaseDirectory}Temp", $"{Guid.NewGuid()}.XML");
StreamReader fsin = new(XMLOutput);
StreamWriter fsout = new(TempXML);
string linedata = string.Empty;
int cnt = 0;
while (!fsin.EndOfStream)
{
if (cnt != 3 && linedata != string.Empty)
{
fsout.WriteLine(linedata);
}
linedata = fsin.ReadLine();
cnt++;
}
fsout.Close();
fsin.Close();
XMLOutput = TempXML;
}
xlogger.Info("Populating pdf fields");
//Now loop through our field data and update the XML
XmlDocument xmldoc = new();
xmldoc.Load(XMLOutput);
XmlNamespaceManager xmlnsManager = new(xmldoc.NameTable);
xmlnsManager.AddNamespace("xfa", #"http://www.xfa.org/schema/xfa-data/1.0/");
string[] FieldValues;
string[] MultiNodes;
foreach (KeyValuePair<string, DocumentFieldData> v in DocumentData.FieldData)
{
if (!string.IsNullOrEmpty(v.Value.Field))
{
FieldValues = v.Value.Field.Contains(";") ? v.Value.Field.Split(';') : (new string[] { v.Value.Field });
foreach (string FValue in FieldValues)
{
XmlNodeList aNodes;
if (FValue.Contains("{"))
{
aNodes = xmldoc.SelectNodes(FValue.Substring(0, FValue.LastIndexOf("{")), xmlnsManager);
if (aNodes.Count > 1)
{
//We have a multinode
MultiNodes = FValue.Split('{');
int NodeIndex = int.Parse(MultiNodes[1].Replace("}", ""));
aNodes[NodeIndex].InnerText = v.Value.Data;
}
}
else
{
aNodes = xmldoc.SelectNodes(FValue, xmlnsManager);
if (aNodes.Count >= 1)
{
aNodes[0].InnerText = v.Value.Data;
}
}
}
}
}
xmldoc.Save(XMLOutput);
//Now we've updated the XML apply it to the pdf
xfa.FillXfaForm(new FileStream(XMLOutput, FileMode.Open, FileAccess.Read));
xfa.Write(pdf);
pdf.Close();
FYI I've also tried to set a field directly also with the same results.
PdfReader preader = new PdfReader(source);
PdfDocument pdfDoc=new PdfDocument(preader, new PdfWriter(dest), new StampingProperties().UseAppendMode());
PdfAcroForm pdfForm = PdfAcroForm.GetAcroForm(pdfDoc, true);
XfaForm xform = pdfForm.GetXfaForm();
xform.SetXfaFieldValue("VRM[0].CoverPage[0].Wrap2[0].Table[0].CSID[0]", "Test");
xform.Write(pdfForm);
pdfDoc.Close();
If anyone has any ideas it would be appreciated.
Cheers
I ran into a very similar issue. I was attempting to auto fill an XFA that was password protected while not breaking the certificate or usage rights (it allowed filling). iText7 seems to have made this not possible for legal/practical reasons, however it is still very much possible with iText5. I wrote the following working codeusing iTextSharp (C# version if iText5):
using iTextSharp.text;
using iTextSharp.text.pdf;
string pathToRead = "/Users/home/Desktop/c#pdfParser/encrypted_empty.pdf";
string pathToSave = "/Users/home/Desktop/c#pdfParser/xfa_encrypted_filled.pdf";
string data = "/Users/home/Desktop/c#pdfParser/sample_data.xml";
FillByItextSharp5(pathToRead, pathToSave, data);
static void FillByItextSharp5(string pathToRead, string pathToSave, string data)
{
using (FileStream pdf = new FileStream(pathToRead, FileMode.Open))
using (FileStream xml = new FileStream(data, FileMode.Open))
using (FileStream filledPdf = new FileStream(pathToSave, FileMode.Create))
{
PdfReader.unethicalreading = true;
PdfReader pdfReader = new PdfReader(pdf);
PdfStamper stamper = new PdfStamper(pdfReader, filledPdf, '\0', true);
stamper.AcroFields.Xfa.FillXfaForm(xml, true);
stamper.Close();
pdfReader.Close();
}
}
PdfStamper stamper = new PdfStamper(pdfReader, filledPdf, '\0', true)
you have to use this line.

Converting dynamic cshtml to pdf using iTextSharp?

I was using Rotativa for the conversion of html to pdf. My whole aim was to send receipt as pdf to customers emailid. It was working well locally but when deployed in GoDaddy Server its not supported. So I am planning to generate the pdf using iTextSharp(Viewers Opinion is also invited).
Code written for the generation of pdf in Rotativa:
public ActionResult GetPdfReceipt(int RegId)
{
var actionPDF = new Rotativa.ActionAsPdf("GetPdfReceipt", new { RegId = regId })
{
FileName = "Receipt.pdf"
};
//Dynamic student receipt pdf
var byteArrayDynamic = actionPDF.BuildPdf(ControllerContext);
}
public ActionResult GetPdfReceipt(int RegId)
{
try
{
var _mdlReceiptPdf = new ReceiptPdfVM
{
...
...
};
return View("Receipts", _mdlReceiptPdf);
}
catch (Exception ex)
{
return View("");
}
}
I have gone through some of the codes for generating pdf using iTextSharp and it was as follows
HttpContext.Current.Response.Clear();
HttpContext.Current.Response.Buffer = true;
HttpContext.Current.Response.Charset = "";
HttpContext.Current.Response.ContentType = "application/pdf";
HttpContext.Current.Response.AddHeader("content-disposition", "attachment;filename=LoginReportPerDay.pdf");
StringWriter sWriter = new StringWriter();
HtmlTextWriter hTWriter = new HtmlTextWriter(sWriter);
GridView1.RenderControl(hTWriter);
StringReader sReader = new StringReader(sWriter.ToString());
Document pdf = new Document(PageSize.A4);
HTMLWorker worker = new HTMLWorker(pdf);
PdfWriter.GetInstance(pdf, HttpContext.Current.Response.OutputStream);
pdf.Open();
worker.Parse(sReader);
pdf.Close();
HttpContext.Current.Response.Write(pdf);
HttpContext.Current.Response.Flush();
HttpContext.Current.Response.End();
How can I use the above code to render dynamically generated Receipt View pdf array bytes.

How to remove one indirectly referenced image from a PDF and keep all others?

I would like to parse a PDF and find the logo via known attributes and when I find a match, remove that image and then copy everything else.
I am using the code below to replace an image with a blank white image to remove a logo from PDFs that are to be printed on letterhead. It replaces the image with a white image of the same size. Is there a way to modify this to actually remove the image (and thus save some space, etc.?).
private static void Main(string[] args)
{
ManipulatePdf(#"C:\in.pdf", #"C:\out.pdf");
Console.WriteLine("Finished - press a key");
Console.ReadKey();
}
public static void ManipulatePdf(String src, String dest)
{
Console.WriteLine("Start");
PdfReader reader = new PdfReader(src);
// first read all references and find the one we wish to work on.
PdfDictionary page = reader.GetPageN(1); // all resources are available to every page (?)
PdfDictionary resources = page.GetAsDict(PdfName.RESOURCES);
PdfDictionary xobjects = resources.GetAsDict(PdfName.XOBJECT);
page = reader.GetPageN(1);
resources = page.GetAsDict(PdfName.RESOURCES);
xobjects = resources.GetAsDict(PdfName.XOBJECT);
foreach (PdfName pdfName in xobjects.Keys)
{
PRStream stream = (PRStream) xobjects.GetAsStream(pdfName);
if (stream.Length > 100000)
{
PdfImage image = new PdfImage(MakeBlankImg(), "", null);
Console.WriteLine("Calling replace stream");
ReplaceStream(stream, image);
}
}
PdfStamper stamper = new PdfStamper(reader, new FileStream(dest, FileMode.Create));
stamper.Close();
reader.Close();
}
public static iTextSharp.text.Image MakeBlankImg()
{
Console.WriteLine("Making small blank image");
byte[] array;
using (MemoryStream ms = new MemoryStream())
{
//var drawingImage = image.GetDrawingImage();
using (Bitmap newBi = new Bitmap(1, 1))
{
using (Graphics g = Graphics.FromImage(newBi))
{
g.Clear(Color.White);
g.Flush();
}
newBi.Save(ms, ImageFormat.Jpeg);
}
array = ms.ToArray();
}
Console.WriteLine("Image array is " + array.Length + " bytes.");
return iTextSharp.text.Image.GetInstance(array);
}
public static void ReplaceStream(PRStream orig, PdfStream stream)
{
orig.Clear();
MemoryStream ms = new MemoryStream();
stream.WriteContent(ms);
orig.SetData(ms.ToArray(), false);
Console.WriteLine("Iterating keys");
foreach (KeyValuePair<PdfName, PdfObject> keyValuePair in stream)
{
Console.WriteLine("Key: " + keyValuePair.Key.ToString());
orig.Put(keyValuePair.Key, stream.Get(keyValuePair.Key));
}
}
}

Html to pdf some characters are missing (itextsharp) in Asp.Net MVC Application

I want to export razor view to pdf by using the itextsharp library. The problem is that some turkish characters such as İ,ı,Ş,ş etc... are missing in the pdf document. The code used to export the pdf is:
public PdfActionResult(object model)
{
Model = model;
}
public override void ExecuteResult(ControllerContext context)
{
IView viewEngineResult;
ViewContext viewContext;
if (ViewName == null)
{
ViewName = context.RouteData.GetRequiredString("action");
}
context.Controller.ViewData.Model = Model;
var workStream = new MemoryStream();
var document = new Document();
PdfWriter writer = PdfWriter.GetInstance(document, workStream);
writer.CloseStream = false;
document.Open();
viewEngineResult = ViewEngines.Engines.FindView(context, ViewName, null).View;
var sb = new StringBuilder();
TextWriter tr = new StringWriter(sb);
viewContext = new ViewContext(context, viewEngineResult, context.Controller.ViewData,
context.Controller.TempData, tr);
viewEngineResult.Render(viewContext, tr);
CultureInfo ci = new CultureInfo("az-Latn-AZ");
Encoding enc = Encoding.GetEncoding(ci.TextInfo.ANSICodePage);
Stream stream = new MemoryStream(enc.GetBytes(sb.ToString()));
XMLWorkerHelper.GetInstance().ParseXHtml(writer, document, stream, null);
document.Close();
new FileContentResult(workStream.ToArray(), "application/pdf").ExecuteResult(context);
}
}
Then I access it as:
public ActionResult StudentPdf(Guid studentId)
{
var model = _studentRepository.GetByIdGuid(studentId);
return new PdfActionResult(model);
}
Thanks for reply
by this way you can print all turkish character.
String htmlText = html.ToString();
Document document = new Document();
string filePath = HostingEnvironment.MapPath("~/Content/Pdf/");
PdfWriter.GetInstance(document, new FileStream(filePath + "\\pdf-"+Name+".pdf", FileMode.Create));
document.Open();
iTextSharp.text.html.simpleparser.HTMLWorker hw = new iTextSharp.text.html.simpleparser.HTMLWorker(document);
FontFactory.Register(Path.Combine(_webHelper.MapPath("~/App_Data/Pdf/arial.ttf")), "Garamond"); // just give a path of arial.ttf
StyleSheet css = new StyleSheet();
css.LoadTagStyle("body", "face", "Garamond");
css.LoadTagStyle("body", "encoding", "Identity-H");
css.LoadTagStyle("body", "size", "12pt");
hw.SetStyleSheet(css);
hw.Parse(new StringReader(htmlText));
Hope this helps
Regards,
Vinit Patel

iTextSharp XmlWorker: right-to-left

After a long time of struggling with this not-so-friendly API, I am finally making progress, but now I've come to a really nasty issue.. I have placed "dir" attributes in various places in my html with the value being "rtl".. but the XMLWorker doesn't seem to respect that at all. Does anyone know of a workaround? Here's my method:
public static void Generate<TModel>(string templateFile, TModel model, string outputFile, IEnumerable<string> fonts)
{
string template = System.IO.File.ReadAllText(templateFile);
string result = Razor.Parse(template, model);
using (var fsOut = new FileStream(outputFile, FileMode.Create, FileAccess.Write))
using (var stringReader = new StringReader(result))
{
var document = new Document();
var pdfWriter = PdfWriter.GetInstance(document, fsOut);
pdfWriter.InitialLeading = 12.5f;
document.Open();
var xmlWorkerHelper = XMLWorkerHelper.GetInstance();
var cssResolver = new StyleAttrCSSResolver();
//cssResolver.AddCss(cssFile);
var xmlWorkerFontProvider = new XMLWorkerFontProvider();
foreach (string font in fonts)
{
xmlWorkerFontProvider.Register(font);
}
var cssAppliers = new CssAppliersImpl(xmlWorkerFontProvider);
var htmlContext = new HtmlPipelineContext(cssAppliers);
htmlContext.SetTagFactory(Tags.GetHtmlTagProcessorFactory());
PdfWriterPipeline pdfWriterPipeline = new PdfWriterPipeline(document, pdfWriter);
HtmlPipeline htmlPipeline = new HtmlPipeline(htmlContext, pdfWriterPipeline);
CssResolverPipeline cssResolverPipeline = new CssResolverPipeline(cssResolver, htmlPipeline);
XMLWorker xmlWorker = new XMLWorker(cssResolverPipeline, true);
XMLParser xmlParser = new XMLParser(xmlWorker);
xmlParser.Parse(stringReader);
document.Close();
}
}
I've created a sample to show how to parse and display RTL data using XMLWorker. Download it from here.