How to skip end of line comments with Astropy ascii table reader? - astropy

I want to read an ascii file that contains end-of line comments into an Astropy table. Something like this:
a | b
1 | 2
3 | 4 # this is a comment
# another comment
5 | 6
As shown here, this doesn't do what I want:
Table.read(filename, format='ascii.csv', delimiter='|', comment='\s*#')
What options do I have to pass to Table.read to make this work?

There is no built-in way to do this, but it should take only a few lines of custom code. The basic idea is to make a subclass of the CSV reader where you override the data_class = CsvData with your own Data class which itself is a subclass of CsvData. In your Data subclass override the process_lines method to do your custom comment line handling. Look at the BaseData class in core.py to see the baseline process_lines behavior.
from astropy.io.ascii.basic import Csv, CsvData
class CsvCommentData(CsvData):
comment = '#' # regular expression not allowed for this reader
def _strip_trailing_comments(self, lines):
for line in lines:
index = line.rfind(self.comment)
if index >= 0:
line = line[:index]
yield line
def process_lines(self, lines):
return super(CsvCommentData, self).process_lines(self._strip_trailing_comments(lines))
class CsvComment(Csv):
_format_name = 'csv_comment'
_io_registry_can_write = True
_description = 'Comma-separated-values with trailing comments allowed'
data_class = CsvCommentData

Related

How to extract the [Documentation] text from Robot framework test case

I am trying to extract the content of the [Documentation] section as a string for comparision with other part in a Python script.
I was told to use Robot framework API https://robot-framework.readthedocs.io/en/stable/
to extract but I have no idea how.
However, I am required to work with version 3.1.2
Example:
*** Test Cases ***
ATC Verify that Sensor Battery can enable and disable manufacturing mode
[Documentation] E1: This is the description of the test 1
... E2: This is the description of the test 2
[Tags] E1 TRACE{Trace_of_E1}
... E2 TRACE{Trace_of_E2}
Extract the string as
E1: This is the description of the test 1
E2: This is the description of the test 2
Have a look at these examples. I did something similar to generate testplans descritio. I tried to adapt my code to your requirements and this could maybe work for you.
import os
import re
from robot.api.parsing import (
get_model, get_tokens, Documentation, EmptyLine, KeywordCall,
ModelVisitor, Token
)
class RobotParser(ModelVisitor):
def __init__(self):
# Create object with remarkup_text to store formated documentation
self.text = ''
def get_text(self):
return self.text
def visit_TestCase(self, node):
# The matched `TestCase` node is a block with `header` and
# `body` attributes. `header` is a statement with familiar
# `get_token` and `get_value` methods for getting certain
# tokens or their value.
for keyword in node.body:
# skip empty lines
if keyword.get_value(Token.DOCUMENTATION) == None:
continue
self.text += keyword.get_value(Token.ARGUMENT)
def visit_Documentation(self,node):
# The matched "Documentation" node with value
self.remarkup_text += node.value + self.new_line
def visit_File(self, node):
# Call `generic_visit` to visit also child nodes.
return self.generic_visit(node)
if __name__ == "__main__":
path = "../tests"
for filename in os.listdir(path):
if re.match(".*\.robot", filename):
model = get_model(os.path.join(path, filename))
robot_parser = RobotParser()
robot_parser.visit(model)
text=robot_parser._text()
The code marked as best answer didn't quite work for me and has a lot of redundancy but it inspired me enough to get into the parsing and write it in a much readable and efficient way that actually works as is. You just have to have your own way of generating & iterating through filesystem where you call the get_robot_metadata(filepath) function.
from robot.api.parsing import (get_model, ModelVisitor, Token)
class RobotParser(ModelVisitor):
def __init__(self):
self.testcases = {}
def visit_TestCase(self, node):
testcasename = (node.header.name)
self.testcases[testcasename] = {}
for section in node.body:
if section.get_value(Token.DOCUMENTATION) != None:
documentation = section.value
self.testcases[testcasename]['Documentation'] = documentation
elif section.get_value(Token.TAGS) != None:
tags = section.values
self.testcases[testcasename]['Tags'] = tags
def get_testcases(self):
return self.testcases
def get_robot_metadata(filepath):
if filepath.endswith('.robot'):
robot_parser = RobotParser()
model = get_model(filepath)
robot_parser.visit(model)
metadata = robot_parser.get_testcases()
return metadata
This function will be able to extract the [Documentation] section from the testcase:
def documentation_extractor(testcase):
documentation = []
for setting in testcase.settings:
if len(setting) > 2 and setting[1].lower() == "[documentation]":
for doc in setting[2:]:
if doc.startswith("#"):
# the start of a comment, so skip rest of the line
break
documentation.append(doc)
break
return "\n".join(documentation)

NiFi: Remove fixed number of header lines from file

I'm processing a file and I'd like to remove (trim) the first X header lines to keep only data, possibly avoiding using regular expressions.
Thanks
You can remove the first X header lines by using ExecuteScript procesor in Nifi.
The following is a example Jython script which I wrote for myself:
import json
import java.io
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback
class PyStreamCallback(StreamCallback):
def __init__(self):
pass
def process(self, inputStream, outputStream):
text = IOUtils.readLines(inputStream, StandardCharsets.UTF_8)
for line in text[3:]:
outputStream.write(line + "\n")
flowFile = session.get()
if (flowFile != None):
flowFile = session.write(flowFile,PyStreamCallback())
flowFile = session.putAttribute(flowFile, "filename", flowFile.getAttribute('filename').split('.')[0]+'_translated.json')
session.transfer(flowFile, REL_SUCCESS)
This obviously removes the first 3 lines but you can easily modify it to remove more or less lines.
Hope that helps.

tkinter variable for drop down selection empty

I tried to program an app in tkinter that would load random lines from a file you select from a pull down menu and display the selected line in a text window.
It seems like the variable "var" in insert_text does not return the selected "option" but rather an "empty" string resulting in a the following error:
"File not found error" (FileNotFoundError: [Errno2] No such file or
directory: '').
Please help!
#!/usr/bin/env python
# Python 3
import tkinter
from tkinter import ttk
import random
class Application:
def __init__(self, root):
self.root = root
self.root.title('Random Stuff')
ttk.Frame(self.root, width=450, height=185).pack()
self.init_widgets()
var = tkinter.StringVar(root)
script = var.get()
choices = ['option1', 'option2', 'option3']
option = tkinter.OptionMenu(root, var, *choices)
option.pack(side='right', padx=10, pady=10)
def init_widgets(self):
ttk.Button(self.root, command=self.insert_txt, text='Button', width='10').place(x=10, y=10)
self.txt = tkinter.Text(self.root, width='45', height='5')
self.txt.place(x=10, y=50)
def insert_txt(self):
var = tkinter.StringVar(root)
name = var.get()
line = random.choice(open(str(name)).readlines())
self.txt.insert(tkinter.INSERT, line)
if __name__ == '__main__':
root = tkinter.Tk()
Application(root)
root.mainloop()
That's because you're just creating an empty StringVar that isn't modified later, thus returning an empty string.
The OptionMenu takes the command parameter that calls the specified method every time another option is selected. Now, you can call a method like this, replacing you insert_txt:
def __init__(self):
# ...
self.var = tkinter.StringVar()
self.options = tkinter.OptionMenu(root, var, *choices, command=self.option_selected)
# ...
def option_selected(self, event):
name = self.var.get()
# The stuff you already had
Additionally, you have to empty the Text widget, otherwise the previous text would stay. I think the Entry widget is better for that, too.

saving and deleting fileI/O

Two part question.
First, how can i change this(i've tried using 'for' but i cant figure it out) so that it saves like;
'key value' instead of '{key: value}'.
with open("phonebook.txt", "w") as x:
json.dump(a, x)
Second, how do you delete from a file by using the users input.
I cannot see a way of changing this to delete from file instead of the dict 'a';
name = input("enter name of contact you want to delete: ")
if name in a:
del a[name]
EDIT. This is what ive done now but it doesnt do whats expected ( i also tried adding the .readlines where x is but it just gets errors.
def save(a):
with open("phonebook.txt", "w") as x:
for k in a:
json.dump(str(k)+" "+str(a[k]), x)
def load():
a = {}
with open("phonebook.txt", "r") as f:
for l in f:
a[l[0]] = l[1]
print (a)
def save works fine (as far as i can see anyway)
Also i have tried c = l.split() and a[c[0]] = c[1]. Just doesnt want to work !
First part
That's not JSON format. Do not use it if you need something else. Use plain text files, like
with open("phonebook.txt","w") as file :
for key, value in a.items() :
file.write(str(key)+" "+str(value))
Second part
It looks you loaded the file into dictionary a. In that case, you just need to write dictionary a back to the file after deleting. If you have not loaded the file into the dictionary yet, you can do it with:
a= {}
with open("phonebook.txt") as file :
for line in file.readlines() :
content= line.split()
a[content[0]]= content[1]

How parsing works

I am trying the sample code for the piracy report.
The line of code:
for incident in soup('td', width="90%"):
seraches the soup for an element td with the attribute width="90%", correct? It invokes the __init__ method of the BeautifulStoneSoup class, which eventually invokes SGMLParser.__init__(self)
Am I correct with the class flow above?
The soup looks like this in the report now:
<td class="fabrik_row___jos_fabrik_icc-ccs-piracymap2010___narrations" ><p>22.09.2010: 0236 UTC: Posn: 03:49.9N – 006:54.6E: Off Bonny River: Nigeria.<p/>
<p>About 21 armed pirates in three crafts boarded a pipe layer crane vessel undertow. All crew locked themselves in accommodations. Pirates were able to take one crewmember as hostage. Master called Nigerian naval vessel in vicinity. Later pirates released the crew and left the vessel. All crew safe.<p/></td>
There is no width markup in the text. I changed the line of code that is searching:
for incident in soup('td', class="fabrik_row___jos_fabrik_icc-ccs-piracymap2010___narrations"):
It appears that class is a reserved word, maybe?
How do I get the current example code to run, and has more changed in the application than just the HTML output?
The URL I am using:
urllib2.urlopen("http://www.icc-ccs.org/index.php?option=com_fabrik&view=table&tableid=534&calculations=0&Itemid=82")
There must be a better way....
import urllib2
from BeautifulSoup import BeautifulSoup
page = urllib2.urlopen("http://www.icc-ccs.org/index.php?option=com_fabrik&view=table&tableid=534&calculations=0&Itemid=82")
soup = BeautifulSoup(page)
soup.find("table",{"class" : "fabrikTable"})
list1 = soup.table.findAll('p', limit=50)
i = 0
imax = 0
for item in list1 :
imax = imax + 1
while i < imax:
Itime = list1[i]
i = i + 2
Incident = list1[i]
i = i + 1
Inext = list1[i]
print "Time ", Itime
print "Incident", Incident
print " "
i = i + 1
class is a reserved word and will not work with that method.
This method works but does not return the list:
soup.find("tr", { "class" : "fabrik_row___jos_fabrik_icc-ccs-piracymap2010___narrations" })
And I confirmed the class flow for the parse.
The example will run, but the HTML must be parsed with different methods because the width='90%' is no longer in the HTML.
Still working on the proper methods; will post back when I get it working.