Are there any tools or ways to fix a bit random order of elements in PDF without OCR?
Using pdf2json and JS below, I am able to build web page with inputs and arrow navigation (easily readable by any screen reader), but still far from perfect.
var elList = [];
var uqIdx = {x:[],y:[]};
function show(json) {
var fnt = json[0].fonts;
var fntI = {};
for (var f in fnt) {
fntI[fnt[f].fontspec] = fnt[f];
}
var txt = json[0].text;
var cp = [];
for (var t in txt) {
if (txt[t].data !== ' ') {
cp.push(txt[t]);
}
}
var prev;
for (var t in cp) {
if (prev) {
if (Math.abs(prev.top - cp[t].top) < prev.height / 2) {
var x = prev.left + prev.width; var x2 = cp[t].left;
if (x < x2 && x2 - x > 5 || x2 - x < -5) {
if ((x2 - x) / 2 > prev.data.length / 10 || prev.data.substr(-1) !== ' ') {
prev = cp[t];
continue;
}
}
prev.data += cp[t].data;
prev.width += cp[t].width;
delete cp[t];
continue;
}
}
prev = cp[t];
}
cp.sort(boxCmp);
for (var t in cp)
{
var d = document.createElement('INPUT');
d.setAttribute("style", "position:absolute;top:"+cp[t].top+"pt;left:"+cp[t].left+
"pt;font:" + fntI[cp[t].font].size + "pt " + fntI[cp[t].font].family + ";width:" + cp[t].width + "pt;height:" + cp[t].height + "pt;");
elList.push([cp[t].left, cp[t].left + cp[t].width, d, cp[t].top, cp[t].top + cp[t].height, cp[t].data]);
if (uqIdx.x.indexOf(cp[t].left) < 0) uqIdx.x.push(cp[t].left);
if (uqIdx.y.indexOf(cp[t].top) < 0) uqIdx.y.push(cp[t].top);
d.value = cp[t].data;
document.body.appendChild(d);
}
elList[0][2].focus();
// document.write(document.body.innerHTML);
// document.close;
uqIdx.x.sort(num);
uqIdx.y.sort(num);
}
function boxCmp (a, b) {
if (Math.abs(a.top - b.top) <= 4) {
if (Math.abs(a.left - b.left) <= 2) return 0;
else return a.left - b.left;
} else return a.top - b.top;
}
var me;
function moveEl() {
var el = event.srcElement;
if(event.srcElement.tagName != 'INPUT') return;
if(event.key.indexOf("Page") == 0) return;
var i=0;
while (i<elList.length && elList[i][2] != el) i++;
me = elList[i], flw=[];
if(event.key.indexOf("Down") > -1) {
while (++i < elList.length) if (elList[i][3] > me[3]) flw.push(elList[i]);
flw.sort(distance);
} else
if(event.key.indexOf("Up") > -1) {
while (i--) if (elList[i][3] < me[3]) flw.push(elList[i]);
flw.sort(distance);
} else
if(event.key.indexOf("Left") > -1) {
while (i--) if (elList[i][0] < me[0] && me[3] <= elList[i][4]) flw.push(elList[i]); else break;
flw.sort(distanceX);
} else
if(event.key.indexOf("Right") > -1) {
while (++i < elList.length) if (elList[i][0] > me[0] && me[4] >= elList[i][3]) flw.push(elList[i]); else break;
flw.sort(distanceX);
} else return;
if (!flw.length) return;
flw[0][2].focus();
}
function distance(a,b) {
var ac0 = Math.abs(me[0] - a[0]) + Math.abs(me[3] - a[3]);
var ac1 = Math.abs(me[1] - a[1]) + Math.abs(me[4] - a[4]);
var bc0 = Math.abs(me[0] - b[0]) + Math.abs(me[3] - b[3]);
var bc1 = Math.abs(me[1] - b[1]) + Math.abs(me[4] - b[4]);
return Math.min(ac0,ac1) - Math.min(bc0,bc1);
}
function distanceX(a,b) {
var ac = Math.abs(me[0] - a[0]);
var bc = Math.abs(me[0] - b[0]);
return ac - bc;
}
function num(a,b) { return a-b; }
function XL() {
var xl = [];
for(var y=0;y<uqIdx.y.length;y++) {
var r = [];
for(var x=0;x<uqIdx.x.length;x++) r.push([]);
xl.push(r);
}
var x = uqIdx;
for(var i=0;i<elList.length;i++) {
var el = elList[i];
xl[uqIdx.y.indexOf(el[3])][uqIdx.x.indexOf(el[0])] = el[2].value;
}
var str = '';
for(var y=0;y<uqIdx.y.length;y++) {
str += xl[y].join('\t').replace(/\t+$/, '') + '\n';
}
var ta;
if (document.getElementsByTagName('TEXTAREA').length) ta = document.getElementsByTagName('TEXTAREA')[0];
else {
ta = document.createElement('TEXTAREA');
ta.setAttribute("style", "width:100%;height:100%;position:absolute");
ta.onblur = clear;
document.body.appendChild(ta);
}
ta.value = str;
ta.focus();
}
function loadData() {
//json = JSON.parse(document.frames[0].document.body.innerText);
show(json);
document.body.onkeydown = moveEl;
document.body.ondblclick = XL;
}
function clear(t) {
document.body.removeChild(event.srcElement);
}
var json = [{"number":1,"pages":12,"height":1188,"width":918,"fonts":[{"fontspec":"0","size":"14","family":"Times","color":"#000000"},{"fontspec":"1","size":"16","family":"Helvetica","color":"#000000"},{"fontspec":"2","size":"12","family":"Helvetica","color":"#000000"},{"fontspec":"3","size":"11","family":"Helvetica","color":"#000000"},{"fontspec":"4","size":"11","family":"Helvetica","color":"#000000"},{"fontspec":"5","size":"12","family":"Helvetica","color":"#000000"},{"fontspec":"6","size":"12","family":"Helvetica","color":"#000000"},{"fontspec":"7","size":"12","family":"Times","color":"#000000"}],"text":[{"top":1093,"left":455,"width":8,"height":21,"font":0,"data":"1"},{"top":127,"left":372,"width":68,"height":17,"font":1,"data":"Vaccine"},{"top":127,"left":445,"width":49,"height":17,"font":1,"data":"Injury"},{"top":127,"left":499,"width":47,"height":17,"font":1,"data":"Table"},{"top":186,"left":116,"width":49,"height":12,"font":3,"data":"Applies"},{"top":186,"left":169,"width":30,"height":12,"font":3,"data":"Only"},{"top":186,"left":203,"width":13,"height":12,"font":3,"data":"to"},{"top":186,"left":219,"width":57,"height":12,"font":3,"data":"Petitions"},{"top":186,"left":280,"width":18,"height":12,"font":3,"data":"for"},{"top":186,"left":302,"width":94,"height":12,"font":3,"data":"Compensation"},{"top":186,"left":400,"width":31,"height":12,"font":3,"data":"Filed"},{"top":186,"left":435,"width":37,"height":12,"font":3,"data":"under"},{"top":186,"left":477,"width":20,"height":12,"font":3,"data":"the"},{"top":186,"left":501,"width":53,"height":12,"font":3,"data":"National"},{"top":186,"left":558,"width":51,"height":12,"font":3,"data":"Vaccine"},{"top":186,"left":613,"width":37,"height":12,"font":3,"data":"Injury"},{"top":186,"left":654,"width":94,"height":12,"font":3,"data":"Compensation"},{"top":186,"left":751,"width":56,"height":12,"font":3,"data":"Program"},{"top":202,"left":116,"width":21,"height":12,"font":3,"data":"on "},{"top":202,"left":137,"width":14,"height":12,"font":3,"data":"or"},{"top":202,"left":154,"width":29,"height":12,"font":3,"data":"after"},{"top":202,"left":187,"width":40,"height":12,"font":3,"data":"March"},{"top":202,"left":231,"width":19,"height":12,"font":3,"data":"21,"},{"top":202,"left":254,"width":30,"height":12,"font":3,"data":"2017"},{"top":240,"left":108,"width":18,"height":14,"font":5,"data":"(a)"},{"top":240,"left":130,"width":17,"height":14,"font":5,"data":"In "},{"top":240,"left":147,"width":82,"height":14,"font":5,"data":"accordance "},{"top":240,"left":229,"width":31,"height":14,"font":5,"data":"with "},{"top":240,"left":260,"width":52,"height":14,"font":5,"data":"section "},{"top":240,"left":311,"width":43,"height":14,"font":5,"data":"312(b)"},{"top":240,"left":359,"width":12,"height":14,"font":5,"data":"of"},{"top":240,"left":375,"width":21,"height":14,"font":5,"data":"the"},{"top":240,"left":400,"width":55,"height":14,"font":5,"data":"National"},{"top":240,"left":460,"width":72,"height":14,"font":5,"data":"Childhood "},{"top":240,"left":531,"width":57,"height":14,"font":5,"data":"Vaccine "},{"top":240,"left":589,"width":37,"height":14,"font":5,"data":"Injury"},{"top":240,"left":630,"width":22,"height":14,"font":5,"data":"Act"},{"top":240,"left":656,"width":12,"height":14,"font":5,"data":"of"},{"top":240,"left":672,"width":37,"height":14,"font":5,"data":"1986,"},{"top":240,"left":714,"width":27,"height":14,"font":5,"data":"title "},{"top":240,"left":741,"width":12,"height":14,"font":5,"data":"III"},{"top":240,"left":758,"width":12,"height":14,"font":5,"data":"of"},{"top":257,"left":108,"width":41,"height":14,"font":5,"data":"Public"},{"top":257,"left":153,"width":28,"height":14,"font":5,"data":"Law"},{"top":257,"left":185,"width":51,"height":14,"font":5,"data":"99-660,"},{"top":257,"left":240,"width":29,"height":14,"font":5,"data":"100 "},{"top":257,"left":269,"width":31,"height":14,"font":5,"data":"Stat."},{"top":257,"left":304,"width":37,"height":14,"font":5,"data":"3779 "},{"top":257,"left":341,"width":22,"height":14,"font":5,"data":"(42"},{"top":257,"left":367,"width":44,"height":14,"font":5,"data":"U.S.C."},{"top":257,"left":416,"width":59,"height":14,"font":5,"data":"300aa-1 "},{"top":257,"left":475,"width":34,"height":14,"font":5,"data":"note)"},{"top":257,"left":513,"width":29,"height":14,"font":5,"data":"and "},{"top":257,"left":542,"width":48,"height":14,"font":5,"data":"section"},{"top":257,"left":594,"width":51,"height":14,"font":5,"data":"2114(c)"},{"top":257,"left":649,"width":13,"height":14,"font":5,"data":"of"},{"top":257,"left":666,"width":25,"height":14,"font":5,"data":"the "},{"top":257,"left":691,"width":41,"height":14,"font":5,"data":"Public"},{"top":257,"left":736,"width":43,"height":14,"font":5,"data":"Health"},{"top":274,"left":108,"width":54,"height":14,"font":5,"data":"Service "},{"top":274,"left":162,"width":26,"height":14,"font":5,"data":"Act,"},{"top":274,"left":192,"width":16,"height":14,"font":5,"data":"as"},{"top":274,"left":212,"width":62,"height":14,"font":5,"data":"amended"},{"top":274,"left":279,"width":36,"height":14,"font":5,"data":"(PHS"},{"top":274,"left":319,"width":27,"height":14,"font":5,"data":"Act)"},{"top":274,"left":350,"width":26,"height":14,"font":5,"data":"(42 "},{"top":274,"left":375,"width":44,"height":14,"font":5,"data":"U.S.C."},{"top":274,"left":424,"width":90,"height":14,"font":5,"data":"300aa-14(c)),"},{"top":274,"left":518,"width":25,"height":14,"font":5,"data":"the "},{"top":274,"left":543,"width":62,"height":14,"font":5,"data":"following "},{"top":274,"left":606,"width":11,"height":14,"font":5,"data":"is"},{"top":274,"left":621,"width":12,"height":14,"font":5,"data":"a "},{"top":274,"left":633,"width":37,"height":14,"font":5,"data":"table "},{"top":274,"left":670,"width":12,"height":14,"font":5,"data":"of"},{"top":274,"left":686,"width":62,"height":14,"font":5,"data":"vaccines,"},{"top":274,"left":753,"width":21,"height":14,"font":5,"data":"the"},{"top":292,"left":108,"width":51,"height":14,"font":5,"data":"injuries,"},{"top":292,"left":164,"width":73,"height":14,"font":5,"data":"disabilities,"},{"top":292,"left":241,"width":62,"height":14,"font":5,"data":"illnesses,"},{"top":292,"left":307,"width":72,"height":14,"font":5,"data":"conditions,"},{"top":292,"left":383,"width":29,"height":14,"font":5,"data":"and "},{"top":292,"left":412,"width":45,"height":14,"font":5,"data":"deaths"},{"top":292,"left":461,"width":61,"height":14,"font":5,"data":"resulting "},{"top":292,"left":522,"width":30,"height":14,"font":5,"data":"from"},{"top":292,"left":557,"width":25,"height":14,"font":5,"data":"the "},{"top":292,"left":581,"width":97,"height":14,"font":5,"data":"administration "},{"top":292,"left":679,"width":12,"height":14,"font":5,"data":"of"},{"top":292,"left":696,"width":36,"height":14,"font":5,"data":"such "},{"top":292,"left":731,"width":62,"height":14,"font":5,"data":"vaccines,"},{"top":309,"left":108,"width":29,"height":14,"font":5,"data":"and "},{"top":309,"left":137,"width":25,"height":14,"font":5,"data":"the "},{"top":309,"left":162,"width":33,"height":14,"font":5,"data":"time "},{"top":309,"left":195,"width":41,"height":14,"font":5,"data":"period"},{"top":309,"left":240,"width":12,"height":14,"font":5,"data":"in"},{"top":309,"left":256,"width":42,"height":14,"font":5,"data":"which "},{"top":309,"left":299,"width":21,"height":14,"font":5,"data":"the"},{"top":309,"left":324,"width":24,"height":14,"font":5,"data":"first"},{"top":309,"left":352,"width":61,"height":14,"font":5,"data":"symptom"},{"top":309,"left":417,"width":13,"height":14,"font":5,"data":"or"},{"top":309,"left":435,"width":93,"height":14,"font":5,"data":"manifestation "},{"top":309,"left":528,"width":12,"height":14,"font":5,"data":"of"},{"top":309,"left":545,"width":37,"height":14,"font":5,"data":"onset"},{"top":309,"left":585,"width":13,"height":14,"font":5,"data":"or"},{"top":309,"left":603,"width":12,"height":14,"font":5,"data":"of"},{"top":309,"left":620,"width":25,"height":14,"font":5,"data":"the "},{"top":309,"left":645,"width":67,"height":14,"font":5,"data":"significant"},{"top":309,"left":716,"width":82,"height":14,"font":5,"data":"aggravation "},{"top":326,"left":108,"width":12,"height":14,"font":5,"data":"of"},{"top":326,"left":125,"width":36,"height":14,"font":5,"data":"such "},{"top":326,"left":161,"width":51,"height":14,"font":5,"data":"injuries,"},{"top":326,"left":216,"width":73,"height":14,"font":5,"data":"disabilities,"},{"top":326,"left":294,"width":62,"height":14,"font":5,"data":"illnesses,"},{"top":326,"left":360,"width":72,"height":14,"font":5,"data":"conditions,"},{"top":326,"left":436,"width":29,"height":14,"font":5,"data":"and "},{"top":326,"left":465,"width":45,"height":14,"font":5,"data":"deaths"},{"top":326,"left":514,"width":11,"height":14,"font":5,"data":"is"},{"top":326,"left":529,"width":17,"height":14,"font":5,"data":"to "},{"top":326,"left":546,"width":37,"height":14,"font":5,"data":"occur"},{"top":326,"left":586,"width":30,"height":14,"font":5,"data":"after"},{"top":326,"left":621,"width":55,"height":14,"font":5,"data":"vaccine "},{"top":326,"left":676,"width":98,"height":14,"font":5,"data":"administration "},{"top":326,"left":773,"width":18,"height":14,"font":5,"data":"for"},{"top":343,"left":108,"width":61,"height":14,"font":5,"data":"purposes"},{"top":343,"left":174,"width":12,"height":14,"font":5,"data":"of"},{"top":343,"left":190,"width":64,"height":14,"font":5,"data":"receiving "},{"top":343,"left":254,"width":98,"height":14,"font":5,"data":"compensation "},{"top":343,"left":352,"width":38,"height":14,"font":5,"data":"under"},{"top":343,"left":395,"width":21,"height":14,"font":5,"data":"the"},{"top":343,"left":420,"width":62,"height":14,"font":5,"data":"Program."},{"top":343,"left":486,"width":74,"height":14,"font":5,"data":"Paragraph "},{"top":343,"left":560,"width":18,"height":14,"font":5,"data":"(b)"},{"top":343,"left":582,"width":12,"height":14,"font":5,"data":"of"},{"top":343,"left":599,"width":23,"height":14,"font":5,"data":"this"},{"top":343,"left":626,"width":52,"height":14,"font":5,"data":"section "},{"top":343,"left":678,"width":27,"height":14,"font":5,"data":"sets"},{"top":343,"left":710,"width":34,"height":14,"font":5,"data":"forth "},{"top":343,"left":744,"width":64,"height":14,"font":5,"data":"additional"},{"top":361,"left":108,"width":67,"height":14,"font":5,"data":"provisions"},{"top":361,"left":180,"width":25,"height":14,"font":5,"data":"that"},{"top":361,"left":209,"width":26,"height":14,"font":5,"data":"are "},{"top":361,"left":235,"width":21,"height":14,"font":5,"data":"not"},{"top":361,"left":260,"width":69,"height":14,"font":5,"data":"separately"},{"top":361,"left":333,"width":39,"height":14,"font":5,"data":"listed "},{"top":361,"left":372,"width":16,"height":14,"font":5,"data":"in "},{"top":361,"left":388,"width":23,"height":14,"font":5,"data":"this"},{"top":361,"left":415,"width":42,"height":14,"font":5,"data":"Table "},{"top":361,"left":457,"width":21,"height":14,"font":5,"data":"but"},{"top":361,"left":482,"width":25,"height":14,"font":5,"data":"that"},{"top":361,"left":511,"width":68,"height":14,"font":5,"data":"constitute "},{"top":361,"left":580,"width":26,"height":14,"font":5,"data":"part"},{"top":361,"left":610,"width":12,"height":14,"font":5,"data":"of"},{"top":361,"left":626,"width":12,"height":14,"font":5,"data":"it."},{"top":361,"left":642,"width":74,"height":14,"font":5,"data":"Paragraph "},{"top":361,"left":716,"width":18,"height":14,"font":5,"data":"(c)"},{"top":361,"left":738,"width":12,"height":14,"font":5,"data":"of"},{"top":361,"left":755,"width":23,"height":14,"font":5,"data":"this"},{"top":378,"left":108,"width":51,"height":14,"font":5,"data":"section "},{"top":378,"left":159,"width":27,"height":14,"font":5,"data":"sets"},{"top":378,"left":191,"width":34,"height":14,"font":5,"data":"forth "},{"top":378,"left":225,"width":25,"height":14,"font":5,"data":"the "},{"top":378,"left":250,"width":87,"height":14,"font":5,"data":"qualifications"},{"top":378,"left":341,"width":29,"height":14,"font":5,"data":"and "},{"top":378,"left":371,"width":27,"height":14,"font":5,"data":"aids"},{"top":378,"left":402,"width":17,"height":14,"font":5,"data":"to "},{"top":378,"left":419,"width":92,"height":14,"font":5,"data":"interpretation "},{"top":378,"left":511,"width":18,"height":14,"font":5,"data":"for"},{"top":378,"left":532,"width":25,"height":14,"font":5,"data":"the "},{"top":378,"left":557,"width":38,"height":14,"font":5,"data":"terms"},{"top":378,"left":599,"width":37,"height":14,"font":5,"data":"used "},{"top":378,"left":636,"width":16,"height":14,"font":5,"data":"in "},{"top":378,"left":652,"width":25,"height":14,"font":5,"data":"the "},{"top":378,"left":676,"width":42,"height":14,"font":5,"data":"Table."},{"top":378,"left":722,"width":71,"height":14,"font":5,"data":"Conditions"},{"top":395,"left":108,"width":29,"height":14,"font":5,"data":"and "},{"top":395,"left":137,"width":47,"height":14,"font":5,"data":"injuries"},{"top":395,"left":189,"width":25,"height":14,"font":5,"data":"that"},{"top":395,"left":218,"width":21,"height":14,"font":5,"data":"do "},{"top":395,"left":239,"width":21,"height":14,"font":5,"data":"not"},{"top":395,"left":264,"width":33,"height":14,"font":5,"data":"meet"},{"top":395,"left":301,"width":25,"height":14,"font":5,"data":"the "},{"top":395,"left":326,"width":38,"height":14,"font":5,"data":"terms"},{"top":395,"left":368,"width":12,"height":14,"font":5,"data":"of"},{"top":395,"left":385,"width":25,"height":14,"font":5,"data":"the "},{"top":395,"left":410,"width":86,"height":14,"font":5,"data":"qualifications"},{"top":395,"left":501,"width":29,"height":14,"font":5,"data":"and "},{"top":395,"left":530,"width":27,"height":14,"font":5,"data":"aids"},{"top":395,"left":561,"width":17,"height":14,"font":5,"data":"to "},{"top":395,"left":578,"width":91,"height":14,"font":5,"data":"interpretation "},{"top":395,"left":670,"width":26,"height":14,"font":5,"data":"are "},{"top":395,"left":696,"width":21,"height":14,"font":5,"data":"not"},{"top":395,"left":721,"width":42,"height":14,"font":5,"data":"within "},{"top":395,"left":763,"width":25,"height":14,"font":5,"data":"the "},{"top":413,"left":108,"width":42,"height":14,"font":5,"data":"Table."},{"top":413,"left":154,"width":74,"height":14,"font":5,"data":"Paragraph "},{"top":413,"left":228,"width":18,"height":14,"font":5,"data":"(d)"},{"top":413,"left":250,"width":12,"height":14,"font":5,"data":"of"},{"top":413,"left":267,"width":23,"height":14,"font":5,"data":"this"},{"top":413,"left":295,"width":51,"height":14,"font":5,"data":"section "},{"top":413,"left":346,"width":28,"height":14,"font":5,"data":"sets"},{"top":413,"left":378,"width":34,"height":14,"font":5,"data":"forth "},{"top":413,"left":412,"width":12,"height":14,"font":5,"data":"a "},{"top":413,"left":425,"width":56,"height":14,"font":5,"data":"glossary"},{"top":413,"left":485,"width":12,"height":14,"font":5,"data":"of"},{"top":413,"left":501,"width":38,"height":14,"font":5,"data":"terms"},{"top":413,"left":543,"width":37,"height":14,"font":5,"data":"used "},{"top":413,"left":580,"width":16,"height":14,"font":5,"data":"in "},{"top":413,"left":595,"width":73,"height":14,"font":5,"data":"paragraph "},{"top":413,"left":668,"width":22,"height":14,"font":5,"data":"(c)."},{"top":551,"left":226,"width":57,"height":14,"font":2,"data":"Vaccine"},{"top":533,"left":408,"width":55,"height":14,"font":2,"data":"Illness, "},{"top":533,"left":463,"width":73,"height":14,"font":2,"data":"disability, "},{"top":533,"left":536,"width":41,"height":14,"font":2,"data":"injury"},{"top":551,"left":418,"width":15,"height":14,"font":2,"data":"or"},{"top":551,"left":437,"width":67,"height":14,"font":2,"data":"condition"},{"top":551,"left":509,"width":57,"height":14,"font":2,"data":"covered"},{"top":499,"left":591,"width":35,"height":14,"font":2,"data":"Time"},{"top":499,"left":631,"width":46,"height":14,"font":2,"data":"period"},{"top":499,"left":680,"width":20,"height":14,"font":2,"data":"for"},{"top":499,"left":705,"width":32,"height":14,"font":2,"data":"first "},{"top":499,"left":737,"width":67,"height":14,"font":2,"data":"symptom"},{"top":516,"left":589,"width":15,"height":14,"font":2,"data":"or"},{"top":516,"left":608,"width":97,"height":14,"font":2,"data":"manifestation"},{"top":516,"left":710,"width":18,"height":14,"font":2,"data":"of "},{"top":516,"left":728,"width":44,"height":14,"font":2,"data":"onset "},{"top":516,"left":773,"width":15,"height":14,"font":2,"data":"or"},{"top":516,"left":792,"width":18,"height":14,"font":2,"data":"of "},{"top":533,"left":597,"width":79,"height":14,"font":2,"data":"significant "},{"top":533,"left":676,"width":85,"height":14,"font":2,"data":"aggravation"},{"top":533,"left":765,"width":33,"height":14,"font":2,"data":"after"},{"top":551,"left":616,"width":59,"height":14,"font":2,"data":"vaccine "},{"top":551,"left":675,"width":103,"height":14,"font":2,"data":"administration"},{"top":579,"left":110,"width":8,"height":14,"font":5,"data":"I."},{"top":579,"left":122,"width":61,"height":14,"font":5,"data":"Vaccines"},{"top":579,"left":187,"width":73,"height":14,"font":5,"data":"containing "},{"top":579,"left":260,"width":49,"height":14,"font":5,"data":"tetanus"},{"top":579,"left":313,"width":44,"height":14,"font":5,"data":"toxoid "},{"top":579,"left":357,"width":34,"height":14,"font":5,"data":"(e.g.,"},{"top":596,"left":110,"width":42,"height":14,"font":5,"data":"DTaP,"},{"top":596,"left":156,"width":34,"height":14,"font":5,"data":"DTP,"},{"top":596,"left":195,"width":24,"height":14,"font":5,"data":"DT,"},{"top":596,"left":223,"width":22,"height":14,"font":5,"data":"Td,"},{"top":596,"left":249,"width":13,"height":14,"font":5,"data":"or"},{"top":596,"left":266,"width":24,"height":14,"font":5,"data":"TT)"},{"top":579,"left":400,"width":14,"height":14,"font":5,"data":"A."},{"top":579,"left":418,"width":81,"height":14,"font":5,"data":"Anaphylaxis"},{"top":596,"left":400,"width":14,"height":14,"font":5,"data":"B."},{"top":596,"left":418,"width":58,"height":14,"font":5,"data":"Brachial "},{"top":596,"left":476,"width":51,"height":14,"font":5,"data":"Neuritis"},{"top":575,"left":587,"width":21,"height":20,"font":7,"data":"?4 "},{"top":575,"left":607,"width":42,"height":20,"font":7,"data":"hours."},{"top":596,"left":587,"width":34,"height":14,"font":5,"data":"2-28 "},{"top":596,"left":621,"width":32,"height":14,"font":5,"data":"days"},{"top":596,"left":656,"width":26,"height":14,"font":5,"data":"(not"},{"top":596,"left":686,"width":27,"height":14,"font":5,"data":"less"},{"top":596,"left":717,"width":29,"height":14,"font":5,"data":"than"},{"top":596,"left":751,"width":12,"height":14,"font":5,"data":"2 "},{"top":596,"left":763,"width":31,"height":14,"font":5,"data":"days"},{"top":614,"left":587,"width":29,"height":14,"font":5,"data":"and "},{"top":614,"left":616,"width":21,"height":14,"font":5,"data":"not"},{"top":614,"left":641,"width":39,"height":14,"font":5,"data":"more "},{"top":614,"left":679,"width":29,"height":14,"font":5,"data":"than"},{"top":614,"left":712,"width":21,"height":14,"font":5,"data":"28 "},{"top":614,"left":733,"width":41,"height":14,"font":5,"data":"days)."},{"top":642,"left":400,"width":15,"height":14,"font":5,"data":"C."},{"top":642,"left":419,"width":60,"height":14,"font":5,"data":"Shoulder"},{"top":642,"left":483,"width":37,"height":14,"font":5,"data":"Injury"},{"top":642,"left":524,"width":52,"height":14,"font":5,"data":"Related"},{"top":659,"left":400,"width":17,"height":14,"font":5,"data":"to "},{"top":659,"left":417,"width":57,"height":14,"font":5,"data":"Vaccine "},{"top":659,"left":474,"width":95,"height":14,"font":5,"data":"Administration"},{"top":637,"left":587,"width":29,"height":20,"font":7,"data":"?48 "},{"top":637,"left":616,"width":42,"height":20,"font":7,"data":"hours."},{"top":687,"left":400,"width":15,"height":14,"font":5,"data":"D."},{"top":687,"left":419,"width":70,"height":14,"font":5,"data":"Vasovagal"},{"top":687,"left":493,"width":56,"height":14,"font":5,"data":"syncope"},{"top":683,"left":587,"width":21,"height":20,"font":7,"data":"?1 "},{"top":683,"left":607,"width":34,"height":20,"font":7,"data":"hour."},{"top":716,"left":110,"width":12,"height":14,"font":5,"data":"II."},{"top":716,"left":126,"width":61,"height":14,"font":5,"data":"Vaccines"},{"top":716,"left":191,"width":73,"height":14,"font":5,"data":"containing "},{"top":716,"left":264,"width":43,"height":14,"font":5,"data":"whole "},{"top":716,"left":307,"width":23,"height":14,"font":5,"data":"cell"},{"top":716,"left":334,"width":60,"height":14,"font":5,"data":"pertussis"},{"top":733,"left":110,"width":57,"height":14,"font":5,"data":"bacteria,"},{"top":733,"left":171,"width":66,"height":14,"font":5,"data":"extracted "},{"top":733,"left":237,"width":13,"height":14,"font":5,"data":"or"},{"top":733,"left":254,"width":41,"height":14,"font":5,"data":"partial"},{"top":733,"left":299,"width":23,"height":14,"font":5,"data":"cell"},{"top":733,"left":326,"width":60,"height":14,"font":5,"data":"pertussis"},{"top":750,"left":110,"width":57,"height":14,"font":5,"data":"bacteria,"},{"top":750,"left":171,"width":13,"height":14,"font":5,"data":"or"},{"top":750,"left":189,"width":50,"height":14,"font":5,"data":"specific"},{"top":750,"left":243,"width":60,"height":14,"font":5,"data":"pertussis"},{"top":750,"left":307,"width":67,"height":14,"font":5,"data":"antigen(s)"},{"top":767,"left":110,"width":34,"height":14,"font":5,"data":"(e.g.,"},{"top":767,"left":148,"width":34,"height":14,"font":5,"data":"DTP,"},{"top":767,"left":186,"width":42,"height":14,"font":5,"data":"DTaP,"},{"top":767,"left":233,"width":14,"height":14,"font":5,"data":"P,"},{"top":767,"left":251,"width":63,"height":14,"font":5,"data":"DTP-Hib)"},{"top":716,"left":400,"width":14,"height":14,"font":5,"data":"A."},{"top":716,"left":418,"width":81,"height":14,"font":5,"data":"Anaphylaxis"},{"top":711,"left":587,"width":21,"height":20,"font":7,"data":"?4 "},{"top":711,"left":607,"width":42,"height":20,"font":7,"data":"hours."},{"top":796,"left":400,"width":14,"height":14,"font":5,"data":"B."},{"top":796,"left":418,"width":108,"height":14,"font":5,"data":"Encephalopathy"},{"top":796,"left":530,"width":13,"height":14,"font":5,"data":"or"},{"top":813,"left":400,"width":79,"height":14,"font":5,"data":"encephalitis"},{"top":791,"left":587,"width":29,"height":20,"font":7,"data":"?72 "},{"top":791,"left":616,"width":42,"height":20,"font":7,"data":"hours."},{"top":841,"left":400,"width":15,"height":14,"font":5,"data":"C."},{"top":841,"left":419,"width":60,"height":14,"font":5,"data":"Shoulder"},{"top":841,"left":483,"width":37,"height":14,"font":5,"data":"Injury"},{"top":841,"left":524,"width":56,"height":14,"font":5,"data":"Related "},{"top":858,"left":400,"width":17,"height":14,"font":5,"data":"to "},{"top":858,"left":417,"width":57,"height":14,"font":5,"data":"Vaccine "},{"top":858,"left":474,"width":95,"height":14,"font":5,"data":"Administration"},{"top":837,"left":587,"width":29,"height":20,"font":7,"data":"?48 "},{"top":837,"left":616,"width":42,"height":20,"font":7,"data":"hours."},{"top":887,"left":400,"width":15,"height":14,"font":5,"data":"D."},{"top":887,"left":419,"width":70,"height":14,"font":5,"data":"Vasovagal"},{"top":887,"left":493,"width":56,"height":14,"font":5,"data":"syncope"},{"top":882,"left":587,"width":21,"height":20,"font":7,"data":"?1 "},{"top":882,"left":607,"width":34,"height":20,"font":7,"data":"hour."},{"top":915,"left":110,"width":17,"height":14,"font":5,"data":"III."},{"top":915,"left":130,"width":61,"height":14,"font":5,"data":"Vaccines"},{"top":915,"left":195,"width":73,"height":14,"font":5,"data":"containing "},{"top":915,"left":268,"width":60,"height":14,"font":5,"data":"measles,"},{"top":915,"left":332,"width":54,"height":14,"font":5,"data":"mumps,"}]}]
input {
padding:0px;
border: 1px solid #e0e0e0;
}
input:focus {
background-color: red;
}
<body onload="loadData()" style="width:800px;height:400px">
</body>
After any input get focus arrows navigate to nearest input in particular direction and double click shows textarea with Tab separated content which could be coppied to XL directly.
Part of PDF www.hrsa.gov vaccineinjurytable.pdf used as demo PDF JSON export.
Got problems with some PDFs and found node.js way.
Setup pre-requisties pdf2json#1.2.0: npm i -g pdf2json
Convert and show PDF:
node pdf2htm.js inputFile.pdf
Here node.js (converts PDF 2 JSON, merge with input, save as htm of same name and opens result in default browser):
const fileta = process.argv[2];
const fs = require('fs');
const path = require('path');
const dest = fileta.split('.')[0].replace(/\s+/g, '_') + '.htm';
var exec = require('child_process').exec;
const PDFParser = require(path.join(process.env.APPDATA, 'npm/node_modules', 'pdf2json'));
const pdfParser = new PDFParser();
pdfParser.on('pdfParser_dataError', errData => console.error(errData.parserError));
pdfParser.on('pdfParser_dataReady', pdfData => {
var src = fs.readFileSync(path.join(__dirname, 'showPDF.htm')).toString().split('€');
fs.writeFileSync(path.join(__dirname, dest),
src[0] + 'var json=' +
unescape(decodeURI(JSON.stringify(pdfData))) +
'.formImage.Pages;' +
src[1]
);
console.log(dest);
exec(path.join(__dirname, dest), (a, b, c) => {});
});
pdfParser.loadPDF(path.join(__dirname, fileta));
And here json presenting file
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<style>input{padding:0px;border:1px solid #e0e0e0;}input:focus{background-color:red;}</style>
<script type="text/javascript">
var elList = [];
var uqIdx = {x:[],y:[]}, json;
€
function show(json) {
if (json === undefined) {
var s = document.createElement("SCRIPT");
s.src = location.hash.substr(1);
document.body.appendChild(s);
return;
}
var txt = json[0].Texts;
var cp = [];
for (var t in txt) {
if (txt[t].R[0].T !== ' ') {
cp.push(txt[t]);
}
}
var prev, mX = 27, mY = 27;
for (var t in cp) {
cp[t].x *= mX;
cp[t].y *= mY;
if (prev) {
if (Math.abs(prev.y - cp[t].y) < prev.R[0].TS[1] / 2) {
var x = prev.x + prev.w; var x2 = cp[t].x;
if (x < x2 && x2 - x > 5 || x2 - x < -5) {
if ((x2 - x) / 2 > prev.R[0].T.length / 10 || prev.R[0].T.substr(-1) !== ' ') {
prev = cp[t];
continue;
}
}
prev.R[0].T += cp[t].R[0].T;
prev.w += cp[t].w;
delete cp[t];
continue;
}
}
prev = cp[t];
}
cp.sort(boxCmp);
for (var t in cp)
{
var d = document.createElement('INPUT');
d.setAttribute("style", "position:absolute;top:"+cp[t].y+";left:"+cp[t].x);
d.value = unescape(decodeURI(cp[t].R[0].T));
elList.push([cp[t].x, cp[t].x + d.value.length, d, cp[t].y, cp[t].y + 10, cp[t].R[0].T]);
if (uqIdx.x.indexOf(cp[t].x) < 0) uqIdx.x.push(cp[t].x);
if (uqIdx.y.indexOf(cp[t].y) < 0) uqIdx.y.push(cp[t].y);
document.body.appendChild(d);
}
elList[0][2].focus();
uqIdx.x.sort(num);
uqIdx.y.sort(num);
}
function boxCmp (a, b) {
if (Math.abs(a.y - b.y) <= 4) {
if (Math.abs(a.x - b.x) <= 2) return 0;
else return a.x - b.x;
} else return a.y - b.y;
}
var me;
function moveEl() {
var el = event.srcElement;
if(event.srcElement.tagName != 'INPUT') return;
if(event.key.indexOf("Page") == 0) return;
var i=0;
while (i<elList.length && elList[i][2] != el) i++;
me = elList[i], flw=[];
if(event.key.indexOf("Down") > -1) {
while (++i < elList.length) if (elList[i][3] > me[3]) flw.push(elList[i]);
flw.sort(distance);
} else
if(event.key.indexOf("Up") > -1) {
while (i--) if (elList[i][3] < me[3]) flw.push(elList[i]);
flw.sort(distance);
} else
if(event.key.indexOf("Left") > -1) {
while (i--) if (elList[i][0] < me[0] && me[3] <= elList[i][4]) flw.push(elList[i]); else break;
flw.sort(distanceX);
} else
if(event.key.indexOf("Right") > -1) {
while (++i < elList.length) if (elList[i][0] > me[0] && me[4] >= elList[i][3]) flw.push(elList[i]); else break;
flw.sort(distanceX);
} else return;
if (!flw.length) return;
flw[0][2].focus();
}
function distance(a,b) {
var ac0 = Math.abs(me[0] - a[0]) + Math.abs(me[3] - a[3]);
var ac1 = Math.abs(me[1] - a[1]) + Math.abs(me[4] - a[4]);
var bc0 = Math.abs(me[0] - b[0]) + Math.abs(me[3] - b[3]);
var bc1 = Math.abs(me[1] - b[1]) + Math.abs(me[4] - b[4]);
return Math.min(ac0,ac1) - Math.min(bc0,bc1);
}
function distanceX(a,b) {
var ac = Math.abs(me[0] - a[0]);
var bc = Math.abs(me[0] - b[0]);
return ac - bc;
}
function num(a,b) { return a-b; }
function XL() {
var xl = [];
for(var y=0;y<uqIdx.y.length;y++) {
var r = [];
for(var x=0;x<uqIdx.x.length;x++) r.push([]);
xl.push(r);
}
var x = uqIdx;
for(var i=0;i<elList.length;i++) {
var el = elList[i];
xl[uqIdx.y.indexOf(el[3])][uqIdx.x.indexOf(el[0])] = el[2].value;
}
var str = '';
for(var y=0;y<uqIdx.y.length;y++) {
str += xl[y].join('\t').replace(/\t+$/, '') + '\n';
}
var ta;
if (document.getElementsByTagName('TEXTAREA').length) ta = document.getElementsByTagName('TEXTAREA')[0];
else {
ta = document.createElement('TEXTAREA');
ta.setAttribute("style", "width:100%;height:100%;position:absolute");
ta.onblur = clear;
document.body.appendChild(ta);
}
ta.value = str;
ta.focus();
}
function clear(t) {
document.body.removeChild(event.srcElement);
}
</script>
</head>
<body onload="show(json);" ondblclick="XL()" onkeydown="moveEl()"></body>
</html>
Arrows moves to next nearest element in particular direction, double click shows textarea containing tab delimitted data for excel.