I'm new in Python. I'm modelling Zirconia nanotubes. Have generated the structure in PDB and want to export the file to ANSYS. The codes below was written for Python:
node_value = 0
file = open('C:\\xyz\\test.pdb', 'r')
igot = file.readlines()
output_file = open('C:\\xyz\\nodes.txt', "a") # needs input for distinct file
utput_file.write('/PREP7'+"\n") #open pre-processing menu
for line in igot:
if line.find("ATOM") > -1:
node_value = node_value + 1
nv = str(node_value)
xyz = line.split()
#print kv,",", xyz[6], ",", xyz[7], ",", xyz[8]
line_i = "N" + ', '+ nv + ', ' + xyz[6] + ', ' + xyz[7] +', '+ xyz[8]+ "\n"
print (line_i)
output_file.write(line_i)
I got an error message:File "c:\xyz\test.py", line 2, in
file = open('C:\xyz\test.pdb', 'r')
builtins.FileNotFoundError: [Errno 2] No such file or directory: 'C:\xyz\test.pdb'
How do I solve the problem?
Bold guess: There is no file 'C:\xyz\test.pdb'
Related
I have excel file with special character. I want to write the DataFrame without the double quote, but receive error. Help is very much appreciated.
To generate operation commands from excel to text format
from pandas import DataFrame
import pandas as pd
filename = r'In_file.xlsx'
df = pd.read_excel(filename, header=None)
df1 = df[0] + ' ' + df[1] + ' ' + df[2]
df1.to_csv('out_file3.txt', index=False, header=False, quoting=csv.QUOTE_NONE)
Error:
NameError Traceback (most recent call last)
<ipython-input-9-70ff5701bfb8> in <module>
9 df1 = df[0] + ' ' + df[1] + ' ' + df[2]
10
---> 11 df1.to_csv('out_file3.txt', index=False, header=False, quoting=csv.QUOTE_NONE)
> NameError: name 'csv' is not defined
You're missing the csv package import:
import csv # <- HERE!
from pandas import DataFrame
import pandas as pd
filename = r'In_file.xlsx'
df = pd.read_excel(filename, header=None)
df1 = df[0] + ' ' + df[1] + ' ' + df[2]
df1.to_csv('out_file3.txt', index=False, header=False, quoting=csv.QUOTE_NONE)
i am extracting selected pages from a pdf file. and want to assign dataframe name based on the pages extracted:
file = "abc"
selected_pages = ['10','11'] #can be any combination eg ['6','14','20]
for i in selected_pages():
df{str(i)} = read_pdf(path + file + ".pdf",encoding = 'ISO-8859-1', stream = True,area = [100,10,740,950],pages= (i), index = False)
print (df{str(i)} )
The idea, ultimately, as in above example, is to have dataframes: df10, df11. I have tried "df" + str(i), "df" & str(i) & df{str(i)}. however all are giving error msg: SyntaxError: invalid syntax
Or any better way of doing it is most welcome. thanks
This is where a dictionary would be a much better option.
Also note the error you have at the start of the loop. selected_pages is a list, so you can't do selected_pages().
file = "abc"
selected_pages = ['10','11'] #can be any combination eg ['6','14','20]
df = {}
for i in selected_pages:
df[i] = read_pdf(path + file + ".pdf",encoding = 'ISO-8859-1', stream = True, area = [100,10,740,950], pages= (i), index = False)
i = int(i) - 1 # this will bring it to 10
dfB = df[str(i)]
#select row number to drop: 0:4
dfB.drop(dfB.index[0:4],axis =0, inplace = True)
dfB.columns = ['col1','col2','col3','col4','col5']
I have several files (yml, tf, xml) for which I need to find a string i.e. var1, and then insert a new line with foo2, the rest of the line is unchanged.
Example
variable "my_vars" {
type = "map"
default = {
var1 = "10.48.225.160/28"
var2 = "10.48.225.160/28"
var3 = "10.48.225.160/28"
var4 = "10.48.225.160/28"
}
}
I tried the code below but I need the edit in place.
import sys
import string
def find(substr, replstr, infile):
f = open(infile,"rw")
lines = f.readlines()
for i in range(len(lines)):
if substr in lines[i]:
j = string.replace(lines[i], substr, replstr)
lines.insert(i + 1, j)
print "\n".join(lines)
old_env = sys.argv[1]
new_env = sys.argv[2]
file = sys.argv[3]
find(old_env, new_env, file)
import sys
import string
def find(substr, replstr, infile):
f = open(infile,"r")
lines = f.readlines()
for i in range(len(lines)):
if substr in lines[i]:
j = string.replace(lines[i], substr, replstr)
lines.insert(i + 1, j)
print "".join(lines)
f.close()
f = open(infile,"w")
k = "".join(lines)
f.writelines(k)
f.close()
old_env = sys.argv[1]
new_env = sys.argv[2]
file = sys.argv[3]
find(old_env, new_env, file)
The one caveat is there is a match on the last line of the file, the iterator will miss this.
I have 3 column csv file, with column headings {id, ingredients, recipe}
Now, I want to create a dictionary in a way, id is the key and ingredients and recipe is the value.
When combining ingredients and recipe, I need to insert a fullstop and a whitespace as well.
e.g., <ingredient>. <recipe>
My current code is as follows.
input_data = pd.read_csv( input_file, header=0, delimiter="\t", quoting=3 )
L= input_data["ingredient"] + '. ' + input_data["recipe"]
my_d = input_data.set_index('id')[L].to_dict()
Please help me!!
Use zip with dict:
my_d = dict(zip(input_data['id'], input_data["ingredient"] + '. ' + input_data["recipe"]))
Sample:
input_data = pd.DataFrame({'ingredient':list('abg'),
'id':[1,2,4],
'recipe':list('rth')})
print (input_data)
id ingredient recipe
0 1 a r
1 2 b t
2 4 g h
my_d = dict(zip(input_data['id'], input_data["ingredient"] + '. ' + input_data["recipe"]))
print (my_d)
{1: 'a. r', 2: 'b. t', 4: 'g. h'}
How do I get a list of (recently) failed jobs (failed=100 or exit_status=137) from the SGE? From the qacct help:
[-j [job_id|job_name|pattern]] list all [matching] jobs
How do I use the pattern? I tried the following, does not work.
qacct -j failed=100
"pattern" in this case refers to a simple globbing expression to match against a job name, e.g. qacct -j 'myjob*'
qacct unfortunately doesn't have the filtration capability you're looking for - it's possible to filter on complex job attributes, but not fundamental ones like exit_status or failed.
You CAN retrieve that information from the SGE accounting file(assuming you have access to it) with just a little work. When SGE finishes a job, it writes out a simple record to $SGE_ROOT/$SGE_CELL/common/accounting - this is the file that qacct reads. You'll want to check the accounting(5) man page on your qmaster for details specific to your GridEngine version, but a job record in your accounting file should more or less look like this:
all.q:myexechost:group:user:myjobstep16:1126971:sge:0:1369755166:1369768897:1369769771:0:0:874:796.564903:30.676336:15788.000000:0:0:0:0:17009:2:0:47987400.000000:34033048:0:0:0:9468:27604:NONE:defaultdepartment:NONE:1:0:827.241239:96.445328:39.111400:-q all.q:0.000000:NONE:237133824.000000:0:0
In this particular record, failed and exit_status are the 12th and 13th fields, respectively. For a quick and dirty "recent failures" list, we can use these along with fields 6(job id) and 11(job end time) like so to reveal any failures in the most recent 100 jobs:
$ cut -d':' -f6,11,12,13 $SGE_ROOT/$SGE_CELL/common/accounting|sort -t':' -k2|tail -100|grep ':100:137'
I wrote a python script to parse the accounting file for failed jobs. You should edit it to your own use.
#!/usr/local/bin/python2.7
import os
from sys import *
import sys
import getopt
import datetime
#Variables
program = "parse_acct.py"
ifile = "/local/cluster/sge/default/common/accounting"
failed = 0
failedswitch = 0
subtime = 0
subtimeswitch = 0
begtime = 0
begtimeswitch = 0
endtime = 0
endtimeswitch = 0
user = 0
userswitch = 0
node = ""
nodeswitch = 0
### Read command line args
try:
myopts, args = getopt.getopt(sys.argv[1:],"i:f:n:t:u:b:e:h")
except getopt.GetoptError:
print program + " -i <input> -u <username> -n <node_name> -f"
sys.exit(2)
###############################
# o == option
# a == argument passed to the o
###############################
for o, a in myopts:
if o == '-f':
failed = a
failedswitch = 1
elif o == '-i':
ifile = a
elif o == '-u':
user = a
userswitch = 1
elif o == '-t':
subtime = a
subtimeswitch = 1
elif o == '-b':
begtime = a
begtimeswitch = 1
elif o == '-e':
endtime = a
endtimeswitch = 1
elif o == '-n':
node = a
nodeswitch = 1
elif o == '-h':
print program + " -i <input> -u <username> -n <node_name> -f"
sys.exit(0)
else:
print("Usage: %s -i <input> -u <username> -n <node_name> -f" % sys.argv[0])
sys.exit(0)
### --- Read line by line and import in to a list of lists --- ###
loi = []
f = open(ifile, "r")
for var in f:
line = var.rstrip().split(":")
if len(line) >= 10:
loi.append(line)
#print line
f.close()
### --- Parse through the list of lists and put a 0 to the beginning if it fails a test --- ###
for i in range(len(loi)):
if failedswitch == 1 and loi[i][11] >= 1: #!= failed:
loi[i][0] = [0]
elif userswitch == 1 and loi[i][3] != user:
loi[i][0] = [0]
elif nodeswitch == 1 and node != loi[i][1]:
loi[i][0] = [0]
# elif nodeswitch == 1 and node not in loi[i][1]:
# loi[i][0] = [0]
# elif nodeswitch == 1 and node not in loi[i][1]:
# loi[i][0] = [0]
# elif nodeswitch == 1 and node not in loi[i][1]:
# loi[i][0] = [0]
# elif nodeswitch == 1 and node not in loi[i][1]:
# loi[i][0] = [0]
### --- Remove all entries that have the "0" at the beginning --- ###
loidedup = [x for x in loi if x[0] != [0]
### --- Print out the files that passed all tests --- ###
for i in range(len(loidedup)):
print "=============================================================="
print "qname " + loidedup[i][0]
print "hostname " + loidedup[i][1]
print "group " + loidedup[i][2]
print "owner " + loidedup[i][3]
print "job_name " + loidedup[i][4]
print "job_number " + loidedup[i][5]
print "account " + loidedup[i][6]
print "priority " + loidedup[i][7]
print "submission_time " + datetime.datetime.fromtimestamp(int(loidedup[i][8])).strftime('%Y-%m-%d %H:%M:%S')
print "start_time " + datetime.datetime.fromtimestamp(int(loidedup[i][9])).strftime('%Y-%m-%d %H:%M:%S')
print "end_time " + datetime.datetime.fromtimestamp(int(loidedup[i][10])).strftime('%Y-%m-%d %H:%M:%S')
print "failed " + loidedup[i][11]
print "exit_status " + loidedup[i][12]
print "ru_wallclock " + loidedup[i][13]
print " ru_utime " + loidedup[i][14]
print " ru_stime " + loidedup[i][15]
print " ru_maxrss " + loidedup[i][16]
print " ru_ixrss " + loidedup[i][17]
print " ru_ismrss " + loidedup[i][18]
print " ru_idrss " + loidedup[i][19]
print " ru_isrss " + loidedup[i][20]
print " ru_minflt " + loidedup[i][21]
print " ru_majflt " + loidedup[i][22]
print " ru_nswap " + loidedup[i][23]
print " ru_inblock " + loidedup[i][24]
print " ru_oublock " + loidedup[i][25]
print " ru_msgsnd " + loidedup[i][26]
print " ru_msgrcv " + loidedup[i][27]
print " ru_nsignals " + loidedup[i][28]
print " ru_nvcsw " + loidedup[i][29]
print " ru_nivcsw " + loidedup[i][30]
print "project " + loidedup[i][31]
print "department " + loidedup[i][32]
print "granted_pe " + loidedup[i][33]
print "slots " + loidedup[i][34]
print "task_number " + loidedup[i][35]
print "cpu " + loidedup[i][36]
print "mem " + loidedup[i][37]
print "io " + loidedup[i][38]
print "category " + loidedup[i][39]
print "iow " + loidedup[i][40]
print "pe_taskid " + loidedup[i][41]
print "maxvmem " + loidedup[i][42]
print "arid " + loidedup[i][43]
print "ar_submission_time " + loidedup[i][44]
# print loidedup[i]