How to add a comment to a cell in Excel 2007 using the Open XML SDK 2.0? - excel-2007

Has anyone ever had any luck figuring out how to add a comment to Excel using the Open XML SDK 2.0? I couldn't find any documentation on where to get started on this issue.

The below code will take the worksheet that you want to add comments to and then iterate over the commentsToAdd dictionary. The dictionary key is the cell reference (ie. A1) and the value is the comment text to be added.
/// <summary>
/// Adds all the comments defined in the commentsToAddDict dictionary to the worksheet
/// </summary>
/// <param name="worksheetPart">Worksheet Part</param>
/// <param name="commentsToAddDict">Dictionary of cell references as the key (ie. A1) and the comment text as the value</param>
public static void InsertComments(WorksheetPart worksheetPart, Dictionary<string, string> commentsToAddDict)
{
if (commentsToAddDict.Any())
{
string commentsVmlXml = string.Empty;
// Create all the comment VML Shape XML
foreach (var commentToAdd in commentsToAddDict)
{
commentsVmlXml += GetCommentVMLShapeXML(GetColumnName(commentToAdd.Key), GetRowIndex(commentToAdd.Key).ToString());
}
// The VMLDrawingPart should contain all the definitions for how to draw every comment shape for the worksheet
VmlDrawingPart vmlDrawingPart = worksheetPart.AddNewPart<VmlDrawingPart>();
using (XmlTextWriter writer = new XmlTextWriter(vmlDrawingPart.GetStream(FileMode.Create), Encoding.UTF8))
{
writer.WriteRaw("<xml xmlns:v=\"urn:schemas-microsoft-com:vml\"\r\n xmlns:o=\"urn:schemas-microsoft-com:office:office\"\r\n xmlns:x=\"urn:schemas-microsoft-com:office:excel\">\r\n <o:shapelayout v:ext=\"edit\">\r\n <o:idmap v:ext=\"edit\" data=\"1\"/>\r\n" +
"</o:shapelayout><v:shapetype id=\"_x0000_t202\" coordsize=\"21600,21600\" o:spt=\"202\"\r\n path=\"m,l,21600r21600,l21600,xe\">\r\n <v:stroke joinstyle=\"miter\"/>\r\n <v:path gradientshapeok=\"t\" o:connecttype=\"rect\"/>\r\n </v:shapetype>"
+ commentsVmlXml + "</xml>");
}
// Create the comment elements
foreach (var commentToAdd in commentsToAddDict)
{
WorksheetCommentsPart worksheetCommentsPart = worksheetPart.WorksheetCommentsPart ?? worksheetPart.AddNewPart<WorksheetCommentsPart>();
// We only want one legacy drawing element per worksheet for comments
if (worksheetPart.Worksheet.Descendants<LegacyDrawing>().SingleOrDefault() == null)
{
string vmlPartId = worksheetPart.GetIdOfPart(vmlDrawingPart);
LegacyDrawing legacyDrawing = new LegacyDrawing() { Id = vmlPartId };
worksheetPart.Worksheet.Append(legacyDrawing);
}
Comments comments;
bool appendComments = false;
if (worksheetPart.WorksheetCommentsPart.Comments != null)
{
comments = worksheetPart.WorksheetCommentsPart.Comments;
}
else
{
comments = new Comments();
appendComments = true;
}
// We only want one Author element per Comments element
if (worksheetPart.WorksheetCommentsPart.Comments == null)
{
Authors authors = new Authors();
Author author = new Author();
author.Text = "Author Name";
authors.Append(author);
comments.Append(authors);
}
CommentList commentList;
bool appendCommentList = false;
if (worksheetPart.WorksheetCommentsPart.Comments != null &&
worksheetPart.WorksheetCommentsPart.Comments.Descendants<CommentList>().SingleOrDefault() != null)
{
commentList = worksheetPart.WorksheetCommentsPart.Comments.Descendants<CommentList>().Single();
}
else
{
commentList = new CommentList();
appendCommentList = true;
}
Comment comment = new Comment() { Reference = commentToAdd.Key, AuthorId = (UInt32Value)0U };
CommentText commentTextElement = new CommentText();
Run run = new Run();
RunProperties runProperties = new RunProperties();
Bold bold = new Bold();
FontSize fontSize = new FontSize() { Val = 8D };
Color color = new Color() { Indexed = (UInt32Value)81U };
RunFont runFont = new RunFont() { Val = "Tahoma" };
RunPropertyCharSet runPropertyCharSet = new RunPropertyCharSet() { Val = 1 };
runProperties.Append(bold);
runProperties.Append(fontSize);
runProperties.Append(color);
runProperties.Append(runFont);
runProperties.Append(runPropertyCharSet);
Text text = new Text();
text.Text = commentToAdd.Value;
run.Append(runProperties);
run.Append(text);
commentTextElement.Append(run);
comment.Append(commentTextElement);
commentList.Append(comment);
// Only append the Comment List if this is the first time adding a comment
if (appendCommentList)
{
comments.Append(commentList);
}
// Only append the Comments if this is the first time adding Comments
if (appendComments)
{
worksheetCommentsPart.Comments = comments;
}
}
}
}
Helper method that will create the VML XML for the Shape:
/// <summary>
/// Creates the VML Shape XML for a comment. It determines the positioning of the
/// comment in the excel document based on the column name and row index.
/// </summary>
/// <param name="columnName">Column name containing the comment</param>
/// <param name="rowIndex">Row index containing the comment</param>
/// <returns>VML Shape XML for a comment</returns>
private static string GetCommentVMLShapeXML(string columnName, string rowIndex)
{
string commentVmlXml = string.Empty;
// Parse the row index into an int so we can subtract one
int commentRowIndex;
if (int.TryParse(rowIndex, out commentRowIndex))
{
commentRowIndex -= 1;
commentVmlXml = "<v:shape id=\"" + Guid.NewGuid().ToString().Replace("-", "") + "\" type=\"#_x0000_t202\" style=\'position:absolute;\r\n margin-left:59.25pt;margin-top:1.5pt;width:96pt;height:55.5pt;z-index:1;\r\n visibility:hidden\' fillcolor=\"#ffffe1\" o:insetmode=\"auto\">\r\n <v:fill color2=\"#ffffe1\"/>\r\n" +
"<v:shadow on=\"t\" color=\"black\" obscured=\"t\"/>\r\n <v:path o:connecttype=\"none\"/>\r\n <v:textbox style=\'mso-fit-shape-to-text:true'>\r\n <div style=\'text-align:left\'></div>\r\n </v:textbox>\r\n <x:ClientData ObjectType=\"Note\">\r\n <x:MoveWithCells/>\r\n" +
"<x:SizeWithCells/>\r\n <x:Anchor>\r\n" + GetAnchorCoordinatesForVMLCommentShape(columnName, rowIndex) + "</x:Anchor>\r\n <x:AutoFill>False</x:AutoFill>\r\n <x:Row>" + commentRowIndex + "</x:Row>\r\n <x:Column>" + GetColumnIndexFromName(columnName) + "</x:Column>\r\n </x:ClientData>\r\n </v:shape>";
}
return commentVmlXml;
}
Helpers to figure out the Column Index and coordinates for the comment Shape:
/// <summary>
/// Gets the coordinates for where on the excel spreadsheet to display the VML comment shape
/// </summary>
/// <param name="columnName">Column name of where the comment is located (ie. B)</param>
/// <param name="rowIndex">Row index of where the comment is located (ie. 2)</param>
/// <returns><see cref="<x:Anchor>"/> coordinates in the form of a comma separated list</returns>
private static string GetAnchorCoordinatesForVMLCommentShape(string columnName, string rowIndex)
{
string coordinates = string.Empty;
int startingRow = 0;
int startingColumn = GetColumnIndexFromName(columnName).Value;
// From (upper right coordinate of a rectangle)
// [0] Left column
// [1] Left column offset
// [2] Left row
// [3] Left row offset
// To (bottom right coordinate of a rectangle)
// [4] Right column
// [5] Right column offset
// [6] Right row
// [7] Right row offset
List<int> coordList = new List<int>(8) { 0, 0, 0, 0, 0, 0, 0, 0};
if (int.TryParse(rowIndex, out startingRow))
{
// Make the row be a zero based index
startingRow -= 1;
coordList[0] = startingColumn + 1; // If starting column is A, display shape in column B
coordList[1] = 15;
coordList[2] = startingRow;
coordList[4] = startingColumn + 3; // If starting column is A, display shape till column D
coordList[5] = 15;
coordList[6] = startingRow + 3; // If starting row is 0, display 3 rows down to row 3
// The row offsets change if the shape is defined in the first row
if (startingRow == 0)
{
coordList[3] = 2;
coordList[7] = 16;
}
else
{
coordList[3] = 10;
coordList[7] = 4;
}
coordinates = string.Join(",", coordList.ConvertAll<string>(x => x.ToString()).ToArray());
}
return coordinates;
}
/// <summary>
/// Given just the column name (no row index), it will return the zero based column index.
/// Note: This method will only handle columns with a length of up to two (ie. A to Z and AA to ZZ).
/// A length of three can be implemented when needed.
/// </summary>
/// <param name="columnName">Column Name (ie. A or AB)</param>
/// <returns>Zero based index if the conversion was successful; otherwise null</returns>
public static int? GetColumnIndexFromName(string columnName)
{
int? columnIndex = null;
string[] colLetters = Regex.Split(columnName, "([A-Z]+)");
colLetters = colLetters.Where(s => !string.IsNullOrEmpty(s)).ToArray();
if (colLetters.Count() <= 2)
{
int index = 0;
foreach (string col in colLetters)
{
List<char> col1 = colLetters.ElementAt(index).ToCharArray().ToList();
int? indexValue = Letters.IndexOf(col1.ElementAt(index));
if (indexValue != -1)
{
// The first letter of a two digit column needs some extra calculations
if (index == 0 && colLetters.Count() == 2)
{
columnIndex = columnIndex == null ? (indexValue + 1) * 26 : columnIndex + ((indexValue + 1) * 26);
}
else
{
columnIndex = columnIndex == null ? indexValue : columnIndex + indexValue;
}
}
index++;
}
}
return columnIndex;
}
Don't forget to save your worksheet and workbook once you are done in order to see the changes.

Many people ask "how to do this" / "how to do that" using OpenXML.
The most frequent answer say that OpenXML is painful to work with (I agree) refer to 3-rd party library (specifically ClosedXML).
If you are not using a 3-rd party library then I would like to answer with a general tip based on this thread: http://social.msdn.microsoft.com/Forums/office/en-US/81f767d0-15ac-42fe-b122-6c5c02b6c373/cell-color-and-add-comment?forum=oxmlsdk
You can create a empty workbook named "Unchanged.xlsx", then do the customization which you want to reflect to C# code in Open XML SaveAs the changed workbook named as "Changed.xlsx". Now open the Open XML SDK tool, use compare files feature, you then can see the changes made to the workbook and how can they been done with Open XML SDK via C#.
There is an option reflect code that gives you a lot of hints about what is going on, see (incomplete) example below:
As it is autogenerated code it can be simplified / inlined / reduced. After a week or two you'll get into speed.

Related

wxGrid destructor Triggers Breakpoint on

I'm new to wxWidgets, although I've been able to get an application up and running fairly smoothly up until this point. For the main window, I'm using a wxGrid inside a wxPanel. Everything runs fine until I close the program.
Thanks in advance for any insight.
The grid is a member of a class derived from wxPanel:
class FormDataView
: public wxPanel
{
public:
FormDataView(wxWindow* parent);
virtual ~FormDataView();
private:
wxGrid* grid_;
}
And created in the constructor. The data for the grid comes from another thread, so I create a custom event for actually writing the data.
wxDEFINE_EVENT(FORMDATAVIEW_UPDATE, wxThreadEvent);
FormDataView::FormDataView(wxWindow* parent)
: wxPanel(parent,wxID_ANY )
{
wxBoxSizer* mbox = new wxBoxSizer(wxVERTICAL);
grid_ = new wxGrid(this, wxID_ANY );
grid_->CreateGrid(0, 0);
mbox->Add(grid_,wxSizerFlags(1).Expand());
Bind(FORMDATAVIEW_UPDATE, &FormDataView::onDataUpdate, this);
}
///
/// This function is called by a child thread when data is received.
///
void
FormDataView::onDataReceived(IFORMATTERBASE_PFONDATARECEIVED_ARGS)
{
newHeaders_ = headers;
newData_ = data;
wxThreadEvent* evt = new wxThreadEvent(FORMDATAVIEW_UPDATE);
evt->SetString("Yo.");
wxQueueEvent(this, evt);
}
///
/// Called by the event loop. This function puts the data
/// into the grid.
///
void
FormDataView::onDataUpdate(wxThreadEvent& evt)
{
FormatterStringList& headers = newHeaders_;
FormatterStringList& data = newData_;
if (grid_->GetNumberRows() <= 0)
{
wxGridCellAttr* attr = new wxGridCellAttr();
attr->SetReadOnly(true);
attr->SetAlignment(wxALIGN_CENTRE, wxALIGN_CENTRE);
for (size_t i = 0; i<headers.size(); ++i)
{
if (grid_->GetNumberCols() <= 0)
grid_->InsertCols();
else
grid_->AppendCols();
grid_->SetColLabelValue(i, headers[i].data());
grid_->SetColAttr(i, attr);
}
}
// suspend redrawing while we add data.
grid_->BeginBatch();
// insert a new row at the top of the table
grid_->InsertRows(
0, // position
1, // number of rows to insert
true); // update labels (not current used)
for (size_t i = 0; i<headers.size(); ++i)
{
if (data.size() < i)
{
grid_->SetCellValue(0, i, "");
}
else
{
grid_->SetCellValue(0, i, data[i].data());
}
}
// resume redrawing.
grid_->EndBatch();
}
Everything runs fine, but when I close, I get the following message. I've indicated the line upon which the breakpoint occurs. Is there some short of sequence for clearing data out of the grid I'm supposed to follow?
wxGrid::CellSpan
wxGrid::GetCellSize( int row, int col, int *num_rows, int *num_cols ) const
{
wxGridCellAttr *attr = GetCellAttr(row, col);
attr->GetSize( num_rows, num_cols );
attr->DecRef();
>>>>>>> if ( *num_rows == 1 && *num_cols == 1 )
return CellSpan_None; // just a normal cell
if ( *num_rows < 0 || *num_cols < 0 )
return CellSpan_Inside; // covered by a multi-span cell
// this cell spans multiple cells to its right/bottom
return CellSpan_Main;
}
The problem was with where I was creating the column attribute. I was re-using the same column attribute instance for every column, but each column needs to have its own instance.
BEFORE:
if (grid_->GetNumberRows() <= 0)
{
///
/// NO! The columns will share the same cell attribute
/// instance.
///
wxGridCellAttr* attr = new wxGridCellAttr();
attr->SetReadOnly(true);
attr->SetAlignment(wxALIGN_CENTRE, wxALIGN_CENTRE);
for (size_t i = 0; i<headers.size(); ++i)
{
if (grid_->GetNumberCols() <= 0)
grid_->InsertCols();
else
grid_->AppendCols();
grid_->SetColLabelValue(i, headers[i].data());
grid_->SetColAttr(i, attr);
}
}
CORRECT:
if (grid_->GetNumberRows() <= 0)
{
for (size_t i = 0; i<headers.size(); ++i)
{
if (grid_->GetNumberCols() <= 0)
grid_->InsertCols();
else
grid_->AppendCols();
grid_->SetColLabelValue(i, headers[i].data());
///
/// Each column will have its own cell attribute.
/// Supposedly, the column will take ownership of this
/// instance.
///
wxGridCellAttr* attr = new wxGridCellAttr();
attr->SetReadOnly(true);
attr->SetAlignment(wxALIGN_CENTRE, wxALIGN_CENTRE);
grid_->SetColAttr(i, attr);
}
}

MS Chart multiple Y- Axes

Is there a way to create a 2 Y axes for one chart.
It seem to be impossible adding more than 2 axes on the same graph.
Any clue ?.
Thanks a lot.
You should download the mschart samples from microsoft ... and look at
Chart features -> Axes -> Multiple Y axes
here the code extract from it
using System.Windows.Forms.DataVisualization.Charting;
...
if(checkBoxUseMultipleYAxis.Checked)
{
// Set custom chart area position
Chart1.ChartAreas["Default"].Position = new ElementPosition(25,10,68,85);
Chart1.ChartAreas["Default"].InnerPlotPosition = new ElementPosition(10,0,90,90);
// Create extra Y axis for second and third series
CreateYAxis(Chart1, Chart1.ChartAreas["Default"], Chart1.Series["Series2"], 13, 8);
CreateYAxis(Chart1, Chart1.ChartAreas["Default"], Chart1.Series["Series3"], 22, 8);
}
else
{
// Set default chart areas
Chart1.Series["Series2"].ChartArea = "Default";
Chart1.Series["Series3"].ChartArea = "Default";
// Remove newly created series and chart areas
while(Chart1.Series.Count > 3)
{
Chart1.Series.RemoveAt(3);
}
while(Chart1.ChartAreas.Count > 1)
{
Chart1.ChartAreas.RemoveAt(1);
}
// Set default chart area position to Auto
Chart1.ChartAreas["Default"].Position.Auto = true;
Chart1.ChartAreas["Default"].InnerPlotPosition.Auto = true;
}
...
/// <summary>
/// Creates Y axis for the specified series.
/// </summary>
/// <param name="chart">Chart control.</param>
/// <param name="area">Original chart area.</param>
/// <param name="series">Series.</param>
/// <param name="axisOffset">New Y axis offset in relative coordinates.</param>
/// <param name="labelsSize">Extra space for new Y axis labels in relative coordinates.</param>
public void CreateYAxis(Chart chart, ChartArea area, Series series, float axisOffset, float labelsSize)
{
// Create new chart area for original series
ChartArea areaSeries = chart.ChartAreas.Add("ChartArea_" + series.Name);
areaSeries.BackColor = Color.Transparent;
areaSeries.BorderColor = Color.Transparent;
areaSeries.Position.FromRectangleF(area.Position.ToRectangleF());
areaSeries.InnerPlotPosition.FromRectangleF(area.InnerPlotPosition.ToRectangleF());
areaSeries.AxisX.MajorGrid.Enabled = false;
areaSeries.AxisX.MajorTickMark.Enabled = false;
areaSeries.AxisX.LabelStyle.Enabled = false;
areaSeries.AxisY.MajorGrid.Enabled = false;
areaSeries.AxisY.MajorTickMark.Enabled = false;
areaSeries.AxisY.LabelStyle.Enabled = false;
areaSeries.AxisY.IsStartedFromZero = area.AxisY.IsStartedFromZero;
series.ChartArea = areaSeries.Name;
// Create new chart area for axis
ChartArea areaAxis = chart.ChartAreas.Add("AxisY_" + series.ChartArea);
areaAxis.BackColor = Color.Transparent;
areaAxis.BorderColor = Color.Transparent;
areaAxis.Position.FromRectangleF(chart.ChartAreas[series.ChartArea].Position.ToRectangleF());
areaAxis.InnerPlotPosition.FromRectangleF(chart.ChartAreas[series.ChartArea].InnerPlotPosition.ToRectangleF());
// Create a copy of specified series
Series seriesCopy = chart.Series.Add(series.Name + "_Copy");
seriesCopy.ChartType = series.ChartType;
foreach(DataPoint point in series.Points)
{
seriesCopy.Points.AddXY(point.XValue, point.YValues[0]);
}
// Hide copied series
seriesCopy.IsVisibleInLegend = false;
seriesCopy.Color = Color.Transparent;
seriesCopy.BorderColor = Color.Transparent;
seriesCopy.ChartArea = areaAxis.Name;
// Disable grid lines & tickmarks
areaAxis.AxisX.LineWidth = 0;
areaAxis.AxisX.MajorGrid.Enabled = false;
areaAxis.AxisX.MajorTickMark.Enabled = false;
areaAxis.AxisX.LabelStyle.Enabled = false;
areaAxis.AxisY.MajorGrid.Enabled = false;
areaAxis.AxisY.IsStartedFromZero = area.AxisY.IsStartedFromZero;
// Adjust area position
areaAxis.Position.X -= axisOffset;
areaAxis.InnerPlotPosition.X += labelsSize;
}
...

How to Detect table start in itextSharp?

I am trying to convert pdf to csv file. pdf file has data in tabular format with first row as header. I have reached to the level where I can extract text from a cell, compare the baseline of text in table and detect newline but I need to compare table borders to detect start of table. I do not know how to detect and compare lines in PDF. Can anyone help me?
Thanks!!!
As you've seen (hopefully), PDFs have no concept of tables, just text placed at specific locations and lines drawn around them. There is no internal relationship between the text and the lines. This is very important to understand.
Knowing this, if all of the cells have enough padding you can look for gaps between characters that are large enough such as the width of 3 or more spaces. If the cells don't have enough spacing this will unfortunately probably break.
You could also look at every line in the PDF and try to figure out what represents your "table-like" lines. See this answer for how to walk every token on a page to see what's being drawn.
I was also searching the answer for the similar question, but unfortunately I didn't found one so I did it on my own.
A PDF page like this
Will give the output as
Here is the github link for the dotnet Console Application I made.
https://github.com/Justabhi96/Detect_And_Extract_Table_From_Pdf
This application detects the table in the specific page of the PDF and prints them in a table format on the console.
Here is the code that i used to make this application.
First of all I took the text out of PDF along with their coordinates using a class which extends iTextSharp.text.pdf.parser.LocationTextExtractionStrategy class of iTextSharp. The Code is as follows:
This is the Class that is going to store the chunks with there coordinates and text.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
namespace itextPdfTextCoordinates
{
public class RectAndText
{
public iTextSharp.text.Rectangle Rect;
public String Text;
public RectAndText(iTextSharp.text.Rectangle rect, String text)
{
this.Rect = rect;
this.Text = text;
}
}
}
And this is the class that extends the LocationTextExtractionStrategy class.
using iTextSharp.text.pdf.parser;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
namespace itextPdfTextCoordinates
{
public class MyLocationTextExtractionStrategy : LocationTextExtractionStrategy
{
public List<RectAndText> myPoints = new List<RectAndText>();
//Automatically called for each chunk of text in the PDF
public override void RenderText(TextRenderInfo renderInfo)
{
base.RenderText(renderInfo);
//Get the bounding box for the chunk of text
var bottomLeft = renderInfo.GetDescentLine().GetStartPoint();
var topRight = renderInfo.GetAscentLine().GetEndPoint();
//Create a rectangle from it
var rect = new iTextSharp.text.Rectangle(
bottomLeft[Vector.I1],
bottomLeft[Vector.I2],
topRight[Vector.I1],
topRight[Vector.I2]
);
//Add this to our main collection
this.myPoints.Add(new RectAndText(rect, renderInfo.GetText()));
}
}
}
This class is overriding the RenderText method of the LocationTextExtractionStrategy class which will be called each time you extract the chunks from a PDF page using PdfTextExtractor.GetTextFromPage() method.
using itextPdfTextCoordinates;
using iTextSharp.text.pdf;
//Create an instance of our strategy
var t = new MyLocationTextExtractionStrategy();
var path = "F:\\sample-data.pdf";
//Parse page 1 of the document above
using (var r = new PdfReader(path))
{
for (var i = 1; i <= r.NumberOfPages; i++)
{
// Calling this function adds all the chunks with their coordinates to the
// 'myPoints' variable of 'MyLocationTextExtractionStrategy' Class
var ex = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(r, i, t);
}
}
//Here you can loop over the chunks of PDF
foreach(chunk in t.myPoints){
Console.WriteLine("character {0} is at {1}*{2}",i.Text,i.Rect.Left,i.Rect.Top);
}
Now for Detecting the start and end of the table you can use the coordinates of the chunks extracted from the PDF.
Like if the specific line is not having table then there will be no jumps in the right coordinate of the current chunk and and Left coordinate of next chunk. But the lines having table will be having those coordinate jumps of at least 3 points.
Like for Lines having table will have coordinates of chunks something like this:
right coord of current chunk -> 12.75pts
left coords of next chunk -> 20.30pts
so further you can use this logic to detect tables in the PDF.
The code is as follows:
using itextPdfTextCoordinates;
using iTextSharp.text.pdf;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp1
{
class LineUsingCoordinates
{
public static List<List<string>> getLineText(string path, int page, float[] coord)
{
//Create an instance of our strategy
var t = new MyLocationTextExtractionStrategy();
//Parse page 1 of the document above
using (var r = new PdfReader(path))
{
// Calling this function adds all the chunks with their coordinates to the
// 'myPoints' variable of 'MyLocationTextExtractionStrategy' Class
var ex = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(r, page, t);
}
// List of columns in one line
List<string> lineWord = new List<string>();
// temporary list for working around appending the <List<List<string>>
List<string> tempWord;
// List of rows. rows are list of string
List<List<string>> lineText = new List<List<string>>();
// List consisting list of chunks related to each line
List<List<RectAndText>> lineChunksList = new List<List<RectAndText>>();
//List consisting the chunks for whole page;
List<RectAndText> chunksList;
// List consisting the list of Bottom coord of the lines present in the page
List<float> bottomPointList = new List<float>();
//Getting List of Coordinates of Lines in the page no matter it's a table or not
foreach (var i in t.myPoints)
{
Console.WriteLine("character {0} is at {1}*{2}", i.Text, i.Rect.Left, i.Rect.Top);
// If the coords passed to the function is not null then process the part in the
// given coords of the page otherwise process the whole page
if (coord != null)
{
if (i.Rect.Left >= coord[0] &&
i.Rect.Bottom >= coord[1] &&
i.Rect.Right <= coord[2] &&
i.Rect.Top <= coord[3])
{
float bottom = i.Rect.Bottom;
if (bottomPointList.Count == 0)
{
bottomPointList.Add(bottom);
}
else if (Math.Abs(bottomPointList.Last() - bottom) > 3)
{
bottomPointList.Add(bottom);
}
}
}
// else process the whole page
else
{
float bottom = i.Rect.Bottom;
if (bottomPointList.Count == 0)
{
bottomPointList.Add(bottom);
}
else if (Math.Abs(bottomPointList.Last() - bottom) > 3)
{
bottomPointList.Add(bottom);
}
}
}
// Sometimes the above List will be having some elements which are from the same line but are
// having different coordinates due to some characters like " ",".",etc.
// And these coordinates will be having the difference of at most 4 points between
// their bottom coordinates.
//so to remove those elements we create two new lists which we need to remove from the original list
//This list will be having the elements which are having different but a little difference in coordinates
List<float> removeList = new List<float>();
// This list is having the elements which are having the same coordinates
List<float> sameList = new List<float>();
// Here we are adding the elements in those two lists to remove the elements
// from the original list later
for (var i = 0; i < bottomPointList.Count; i++)
{
var basePoint = bottomPointList[i];
for (var j = i+1; j < bottomPointList.Count; j++)
{
var comparePoint = bottomPointList[j];
//here we are getting the elements with same coordinates
if (Math.Abs(comparePoint - basePoint) == 0)
{
sameList.Add(comparePoint);
}
// here ae are getting the elements which are having different but the diference
// of less than 4 points
else if (Math.Abs(comparePoint - basePoint) < 4)
{
removeList.Add(comparePoint);
}
}
}
// Here we are removing the matching elements of remove list from the original list
bottomPointList = bottomPointList.Where(item => !removeList.Contains(item)).ToList();
//Here we are removing the first matching element of same list from the original list
foreach (var r in sameList)
{
bottomPointList.Remove(r);
}
// Here we are getting the characters of the same line in a List 'chunkList'.
foreach (var bottomPoint in bottomPointList)
{
chunksList = new List<RectAndText>();
for (int i = 0; i < t.myPoints.Count; i++)
{
// If the character is having same bottom coord then add it to chunkList
if (bottomPoint == t.myPoints[i].Rect.Bottom)
{
chunksList.Add(t.myPoints[i]);
}
// If character is having a difference of less than 3 in the bottom coord then also
// add it to chunkList because the coord of the next line will differ at least 10 points
// from the coord of current line
else if (Math.Abs(t.myPoints[i].Rect.Bottom - bottomPoint) < 3)
{
chunksList.Add(t.myPoints[i]);
}
}
// Here we are adding the chunkList related to each line
lineChunksList.Add(chunksList);
}
bool sameLine = false;
//Here we are looping through the lines consisting the chunks related to each line
foreach(var linechunk in lineChunksList)
{
var text = "";
// Here we are looping through the chunks of the specific line to put the texts
// that are having a cord jump in their left coordinates.
// because only the line having table will be having the coord jumps in their
// left coord not the line having texts
for (var i = 0; i< linechunk.Count-1; i++)
{
// If the coord is having a jump of less than 3 points then it will be in the same
// column otherwise the next chunk belongs to different column
if (Math.Abs(linechunk[i].Rect.Right - linechunk[i + 1].Rect.Left) < 3)
{
if (i == linechunk.Count - 2)
{
text += linechunk[i].Text + linechunk[i+1].Text ;
}
else
{
text += linechunk[i].Text;
}
}
else
{
if (i == linechunk.Count - 2)
{
// add the text to the column and set the value of next column to ""
text += linechunk[i].Text;
// this is the list of columns in other word its the row
lineWord.Add(text);
text = "";
text += linechunk[i + 1].Text;
lineWord.Add(text);
text = "";
}
else
{
text += linechunk[i].Text;
lineWord.Add(text);
text = "";
}
}
}
if(text.Trim() != "")
{
lineWord.Add(text);
}
// creating a temporary list of strings for the List<List<string>> manipulation
tempWord = new List<string>();
tempWord.AddRange(lineWord);
// "lineText" is the type of List<List<string>>
// this is our list of rows. and rows are List of strings
// here we are adding the row to the list of rows
lineText.Add(tempWord);
lineWord.Clear();
}
return lineText;
}
}
}
You can call getLineText() method of the above class and run the following loop to see the output in the table structure on the console.
var testFile = "F:\\sample-data.pdf";
float[] limitCoordinates = { 52, 671, 357, 728 };//{LowerLeftX,LowerLeftY,UpperRightX,UpperRightY}
// This line gives the lists of rows consisting of one or more columns
//if you pass the third parameter as null the it returns the content for whole page
// but if you pass the coordinates then it returns the content for that coords only
var lineText = LineUsingCoordinates.getLineText(testFile, 1, null);
//var lineText = LineUsingCoordinates.getLineText(testFile, 1, limitCoordinates);
// For detecting the table we are using the fact that the 'lineText' item which length is
// less than two is surely not the part of the table and the item which is having more than
// 2 elements is the part of table
foreach (var row in lineText)
{
if (row.Count > 1)
{
for (var col = 0; col < row.Count; col++)
{
string trimmedValue = row[col].Trim();
if (trimmedValue != "")
{
Console.Write("|" + trimmedValue + "|");
}
}
Console.WriteLine("");
}
}
Console.ReadLine();

StyledDocument adding extra count to indexof for each line of file

I have a strange problem (at least it appears that way) that when searching for a string in a textPane, I get an extra index for each line number that is searched and returned when using StyledDoc verses just getting the text from a textPane. I get the same text from the same pane, it's just that one is from the plain text the other is from the styled doc. Am I missing something here. I'll try to list as many of the changes between the two versions I am working with.
The plain text version:
public int displayXMLFile(String path, int target){
InputStreamReader inputStream;
FileInputStream fileStream;
BufferedReader buffReader;
if(target == 1){
try{
File file = new File(path);
fileStream = new FileInputStream(file);
inputStream = new InputStreamReader(fileStream,"UTF-8");
buffReader = new BufferedReader(inputStream);
StringBuffer content = new StringBuffer("");
String line = "";
while((line = buffReader.readLine())!=null){
content.append(line+"\n");
}
buffReader.close();
xhw.txtDisplay_1.setText(content.toString());
}
catch(Exception e){
e.printStackTrace();
return -1;
}
}
}
verses the Styled Doc (without the styles applied)
protected void openFile(String path, StyledDocument sDoc, int target)
throws BadLocationException {
FileInputStream fileStream;
String file;
if(target == 1){
file = "Openning First File";
} else {
file = "Openning Second File";
}
try {
fileStream = new FileInputStream(path);
// Get the object of DataInputStream
//DataInputStream in = new DataInputStream(fileStream);
ProgressMonitorInputStream in = new ProgressMonitorInputStream(
xw.getContentPane(), file, fileStream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String strLine;
//Read File Line By Line
while ((strLine = br.readLine()) != null) {
sDoc.insertString(sDoc.getLength(), strLine + "\n", sDoc.getStyle("regular"));
xw.updateProgress(target);
}
//Close the input stream
in.close();
} catch (Exception e){//Catch exception if any
System.err.println("Error: " + e.getMessage());
}
This is how I search:
public int searchText(int sPos, int target) throws BadLocationException{
String search = xhw.textSearch.getText();
String contents;
JTextPane searchPane;
if(target == 1){
searchPane = xhw.txtDisplay_1;
} else {
searchPane = xhw.txtDisplay_2;
}
if(xhw.textSearch.getText().isEmpty()){
xhw.displayDialog("Nothing to search for");
highlight(searchPane, null, 0,0);
} else {
contents = searchPane.getText();
// Search for the desired string starting at cursor position
int newPos = contents.indexOf( search, sPos );
// cycle cursor to beginning of doc window
if (newPos == -1 && sPos > 0){
sPos = 0;
newPos = contents.indexOf( search, sPos );
}
if ( newPos >= 0 ) {
// Select occurrence if found
highlight(searchPane, contents, newPos, target);
sPos = newPos + search.length()+1;
} else {
xhw.displayDialog("\"" + search + "\"" + " was not found in File " + target);
}
}
return sPos;
}
The sample file:
<?xml version="1.0" encoding="UTF-8"?>
<AlternateDepartureRoutes>
<AlternateDepartureRoute>
<AdrName>BOIRR</AdrName>
<AdrRouteAlpha>..BROPH..</AdrRouteAlpha>
<TransitionFix>
<FixName>BROPH</FixName>
</TransitionFix>
</AlternateDepartureRoute>
<AlternateDepartureRoute>
</AlternateDepartureRoutes>
And my highlighter:
public void highlight(JTextPane tPane, String text, int position, int target) throws BadLocationException {
Highlighter highlighter = new DefaultHighlighter();
Highlighter.HighlightPainter painter = new DefaultHighlighter.DefaultHighlightPainter(Color.LIGHT_GRAY);
tPane.setHighlighter(highlighter);
String searchText = xhw.textSearch.getText();
String document = tPane.getText();
int startOfSString = document.indexOf(searchText,position);
if(startOfSString >= 0){
int endOfSString = startOfSString + searchText.length();
highlighter.addHighlight(startOfSString, endOfSString, painter);
tPane.setCaretPosition(endOfSString);
int caretPos = tPane.getCaretPosition();
javax.swing.text.Element root = tPane.getDocument().getDefaultRootElement();
int lineNum = root.getElementIndex(caretPos) +1;
if (target == 1){
xhw.txtLineNum1.setText(Integer.toString(lineNum));
} else if (target == 2){
xhw.txtLineNum2.setText(Integer.toString(lineNum));
} else {
xhw.txtLineNum1.setText(null);
xhw.txtLineNum2.setText(null);
}
} else {
highlighter.removeAllHighlights();
}
}
When I do a search for Alt with the indexof() I get 40 for the plain text (which is what it should return) and 41 when searching with the styled doc. And for each additional line that Alt appears on I get and extra index (so that the indexof() call returns 2 more then needed in line 3). This happens for every additional line that it finds. Am I missing something obvious? (If I need to push this to a smaller single class to make it easier to check I can do this later when I have some more time).
Thanks in advance...
If you are on Windows, then the TextComponent text (searchPane.getText()) can contain carriage-return+newline characters (\r\n), but the TextComponent's Styled Document (sSearchPane.getText(0, sSearchPane.getLength())) contains only newline characters (\n). That's why your newPos is always larger than newPosS by the number of newlines at that point. To fix this, in your search function you can change:
contents = searchPane.getText();
to:
contents = searchPane.getText().replaceAll("\r\n","\n");
That way the search occurs with the same indices that the Styled Document is using.
OK I have found a solution (basicly). I approached this from the aspect that I am getting text from the same text componet in two different ways...
String search = xw.textSearch.getText();
String contents;
String contentsS;
JTextPane searchPane;
StyledDocument sSearchPane;
searchPane = xw.txtDisplay_left;
sSearchPane = xw.txtDisplay_left.getStyledDocument();
contents = searchPane.getText();
contentsS = sSearchPane.getText(0, sSearchPane.getLength());
// Search for the desired string starting at cursor position
int newPos = contents.indexOf( search, sPos );
int newPosS = contentsS.indexOf(search, sPos);
So when comparing the two variables "newPos" & "newPosS", newPos retruned 1 more then newPosS for each line that the search string was found on. So when looking at the sample file and searching for "Alt" the first instance is found on line 2. "newPos" returns 41 and "newPosS returns 40 (which then highlights the correct text). The next occurance (which is found in line 3) "newPos" returns 71 and "newPosS" returns 69. As you can see, every new line increases the count by the line number the occurance begins in. I would suspect that there is an extra character being added in for each new line from the textPane that is not present in the StyledDoc.
I'm sure there is a reasonable explaination but I don't have it at this time.

I need an algorithm that can fit n rectangles of any size in a larger one minimizing its area

I need an algorithm that would take n rectangles of any sizes, and calculate a rectangle big enough to fit them all, minimizing its area so the wasted area is minimum, and also returning the position of all the smaller rectangles within.
The specific task I need this to implement on is in a sprite sheet compiler that would take individual PNG files and make a large PNG with all the images in it, so individual frames can be blitted from this surface at run time.
A nice to have feature would be that it aims to a specific given width/height ratio, but it's not mandatory.
I'd prefer simple, generic code I can port to another language.
This is what I put together for my own needs. The T parameter is whatever object you want associated with the results (think of it like the Tag property). It takes a list of sizes and returns a list of Rects that are arranged
static class LayoutHelper
{
/// <summary>
/// Determines the best fit of a List of Sizes, into the desired rectangle shape
/// </summary>
/// <typeparam name="T">Holder for an associated object (e.g., window, UserControl, etc.)</typeparam>
/// <param name="desiredWidthToHeightRatio">the target rectangle shape</param>
/// <param name="rectsToArrange">List of sizes that have to fit in the rectangle</param>
/// <param name="lossiness">1 = non-lossy (slow). Greater numbers improve speed, but miss some best fits</param>
/// <returns>list of arranged rects</returns>
static public List<Tuple<T, Rect>> BestFitRects<T>(double desiredWidthToHeightRatio,
List<Tuple<Size, T>> rectsToArrange, int lossiness = 10)
{
// helper anonymous function that tests for rectangle intersections or boundary violations
var CheckIfRectsIntersect = new Func<Rect, List<Rect>, double, bool>((one, list, containerHeight) =>
{
if (one.Y + one.Height > containerHeight) return true;
return list.Any(two =>
{
if ((one.Top > two.Bottom) ||
(one.Bottom < two.Top) ||
(one.Left > two.Right) ||
(one.Right < two.Left)) return false; // no intersection
return true; // intersection found
});
});
// helper anonymous function for adding drop points
var AddNewPotentialDropPoints = new Action<SortedDictionary<Point, object>, Rect>(
(potentialDropPoints, newRect) =>
{
// Only two locations make sense for placing a new rectangle, underneath the
// bottom left corner or to the right of a top right corner
potentialDropPoints[new Point(newRect.X + newRect.Width + 1,
newRect.Y)] = null;
potentialDropPoints[new Point(newRect.X,
newRect.Y + newRect.Height + 1)] = null;
});
var sync = new object();
// the outer boundary that limits how high the rectangles can stack vertically
var containingRectHeight = Convert.ToInt32(rectsToArrange.Max(a => a.Item1.Height));
// always try packing using the tallest rectangle first, working down in height
var largestToSmallest = rectsToArrange.OrderByDescending(a => a.Item1.Height).ToList();
// find the maximum possible container height needed
var totalHeight = Convert.ToInt32(rectsToArrange.Sum(a => a.Item1.Height));
List<Tuple<T, Rect>> bestResults = null;
// used to find the best packing arrangement that approximates the target container dimensions ratio
var bestResultsProximityToDesiredRatio = double.MaxValue;
// try all arrangements for all suitable container sizes
Parallel.For(0, ((totalHeight + 1) - containingRectHeight) / lossiness,
//new ParallelOptions() { MaxDegreeOfParallelism = 1},
currentHeight =>
{
var potentialDropPoints = new SortedDictionary<Point, object>(Comparer<Point>.Create((p1, p2) =>
{
// choose the leftmost, then highest point as earlier in the sort order
if (p1.X != p2.X) return p1.X.CompareTo(p2.X);
return p1.Y.CompareTo(p2.Y);
}));
var localResults = new List<Tuple<T, Rect>>();
// iterate through the rectangles from largest to smallest
largestToSmallest.ForEach(currentSize =>
{
// check to see if the next rectangle fits in with the currently arranged rectangles
if (!potentialDropPoints.Any(dropPoint =>
{
var workingPoint = dropPoint.Key;
Rect? lastFittingRect = null;
var lowY = workingPoint.Y;
var highY = workingPoint.Y - 1;
var boundaryFound = false;
// check if it fits in the current arrangement of rects
do
{
// create a positioned rectangle out of the size dimensions
var workingRect = new Rect(workingPoint,
new Point(workingPoint.X + currentSize.Item1.Width,
workingPoint.Y + currentSize.Item1.Height));
// keep moving it up in binary search fashion until it bumps the higher rect
if (!CheckIfRectsIntersect(workingRect, localResults.Select(a => a.Item2).ToList(),
containingRectHeight + (currentHeight * lossiness)))
{
lastFittingRect = workingRect;
if (!boundaryFound)
{
highY = Math.Max(lowY - ((lowY - highY) * 2), 0);
if (highY == 0) boundaryFound = true;
}
else
{
lowY = workingPoint.Y;
}
}
else
{
boundaryFound = true;
highY = workingPoint.Y;
}
workingPoint = new Point(workingPoint.X, lowY - (lowY - highY) / 2);
} while (lowY - highY > 1);
if (lastFittingRect.HasValue) // found the sweet spot for this rect
{
var newRect = lastFittingRect.Value;
potentialDropPoints.Remove(dropPoint.Key);
// successfully found the best location for the new rectangle, so add it to the pending results
localResults.Add(Tuple.Create(currentSize.Item2, newRect));
AddNewPotentialDropPoints(potentialDropPoints, newRect);
return true;
}
return false;
}))
{
// this only occurs on the first square
var newRect = new Rect(0, 0, currentSize.Item1.Width, currentSize.Item1.Height);
localResults.Add(Tuple.Create(currentSize.Item2, newRect));
AddNewPotentialDropPoints(potentialDropPoints, newRect);
}
});
// layout is complete, now see if this layout is the best one found so far
var layoutHeight = localResults.Max(a => a.Item2.Y + a.Item2.Height);
var layoutWidth = localResults.Max(a => a.Item2.X + a.Item2.Width);
var widthMatchingDesiredRatio = desiredWidthToHeightRatio * layoutHeight;
double ratioProximity;
if (layoutWidth < widthMatchingDesiredRatio)
ratioProximity = widthMatchingDesiredRatio / layoutWidth;
else
ratioProximity = layoutWidth / widthMatchingDesiredRatio;
lock (sync)
{
if (ratioProximity < bestResultsProximityToDesiredRatio)
{
// this layout is the best approximation of the desired container dimensions, so far
bestResults = localResults;
bestResultsProximityToDesiredRatio = ratioProximity;
}
}
});
return bestResults ?? new List<Tuple<T, Rect>>() {Tuple.Create(rectsToArrange[0].Item2,
new Rect(new Point(0, 0), new Point(rectsToArrange[0].Item1.Width, rectsToArrange[0].Item1.Height))) };
}
}