how to make R datafile to Python type - pandas

I want to make R datatype to Python datatype below is the whole code
def convert_datafiles(datasets_folder):
import rpy2.robjects
rpy2.robjects.numpy2ri.activate()
pandas2ri.activate()
for root, dirs, files in os.walk(datasets_folder):
for name in files:
# sort out .RData files
if name.endswith('.RData'):
name_ = os.path.splitext(name)[0]
name_path = os.path.join(datasets_folder, name_)
# creat sub-directory
if not os.path.exists(name_path):
os.makedirs(name_path)
file_path = os.path.join(root, name)
robj = robjects.r.load(file_path)
# check out subfiles in the data frame
for var in robj:
###### error happend right here
myRData = pandas2ri.ri2py_dataframe( var )
####error happend right here
# convert to DataFrame
if not isinstance(myRData, pd.DataFrame):
myRData = pd.DataFrame(myRData)
var_path = os.path.join(datasets_folder,name_,var+'.csv')
myRData.to_csv(var_path)
os.remove(os.path.join(datasets_folder, name)) # clean up
print ("=> Success!")
I want to make R datatype to pythone type, but the error keeps popping up like this : AttributeError: 'str' object has no attribute 'dtype'
How should I do to resolve this error?

The rpy2 documentation is somewhat incomplete when it comes to interaction with pandas, but unit tests will provide examples of conversion. For example:
rdataf = robjects.r('data.frame(a=1:2, '
' b=I(c("a", "b")), '
' c=c("a", "b"))')
with localconverter(default_converter + rpyp.converter) as cv:
pandas_df = robjects.conversion.ri2py(rdataf)

Related

How to avoid "missing input files" error in Snakemake's "expand" function

I get a MissingInputException when I run the following snakemake code:
import re
import os
glob_vars = glob_wildcards(os.path.join(os.getcwd(), "inputs","{fileName}.{ext}"))
rule end:
input:
expand(os.path.join(os.getcwd(), "inputs", "{fileName}_rename.fas"), fileName=glob_vars.fileName)
rule rename:
'''
rename fasta file to avoid problems
'''
input:
expand("inputs/{{fileName}}.{ext}", ext=glob_vars.ext)
output:
os.path.join(os.getcwd(), "inputs", "{fileName}_rename.fas")
run:
list_ = []
with open(str(input)) as f2:
line = f2.readline()
while line:
while not line.startswith('>') and line:
line = f2.readline()
fas_name = re.sub(r"\W", "_", line.strip())
list_.append(fas_name)
fas_seq = ""
line = f2.readline()
while not line.startswith('>') and line:
fas_seq += re.sub(r"\s","",line)
line = f2.readline()
list_.append(fas_seq)
with open(str(output), "w") as f:
f.write("\n".join(list_))
My Inputs folder contains these files:
G.bullatarudis.fasta
goldfish_protein.faa
guppy_protein.faa
gyrodactylus_salaris.fasta
protopolystoma_xenopodis.fa
salmon_protein.faa
schistosoma_mansoni.fa
The error message is:
Building DAG of jobs...
MissingInputException in line 10 of /home/zhangdong/works/NCBI/BLAST/RHB/test.rule:
Missing input files for rule rename:
inputs/guppy_protein.fasta
inputs/guppy_protein.fa
I assumed that the error is caused by expand function, because only guppy_protein.faa file exists, but expand also generate guppy_protein.fasta and guppy_protein.fa files. Are there any solutions?
By default, expand will produce all combinations of the input lists, so this is expected behavior. You need your input to lookup the proper extension given a fileName. I haven't tested this:
glob_vars = glob_wildcards(os.path.join(os.getcwd(), "inputs","{fileName}.{ext}"))
# create a dict to lookup extensions given fileNames
glob_vars_dict = {fname: ex for fname, ex in zip(glob_vars.fileName, glob_vars.ext)}
def rename_input(wildcards):
ext = glob_vars_dict[wildcards.fileName]
return f"inputs/{wildcards.fileName}.{ext}"
rule rename:
input: rename_input
A few unsolicited style comments:
You don't have to prepend your glob_wildcards with the os.getcwd, glob_wildcards("inputs", "{fileName}.{ext}")) should work as snakemake uses paths relative to the working directory by default.
Try to stick with snake_case instead of camalCase for your variable names in python
In this case, fileName isn't a great descriptor of what you are capturing. Maybe species_name or species would be clearer
Thanks to Troy Comi, I modified my code and it worked:
import re
import os
import itertools
speciess,exts = glob_wildcards(os.path.join(os.getcwd(), "inputs_test","{species}.{ext}"))
rule end:
input:
expand("inputs_test/{species}_rename.fas", species=speciess)
def required_files(wildcards):
list_combination = itertools.product([wildcards.species], list(set(exts)))
exist_file = ""
for file in list_combination:
if os.path.exists(f"inputs_test/{'.'.join(file)}"):
exist_file = f"inputs_test/{'.'.join(file)}"
return exist_file
rule rename:
'''
rename fasta file to avoid problems
'''
input:
required_files
output:
"inputs_test/{species}_rename.fas"
run:
list_ = []
with open(str(input)) as f2:
line = f2.readline()
while line:
while not line.startswith('>') and line:
line = f2.readline()
fas_name = ">" + re.sub(r"\W", "_", line.replace(">", "").strip())
list_.append(fas_name)
fas_seq = ""
line = f2.readline()
while not line.startswith('>') and line:
fas_seq += re.sub(r"\s","",line)
line = f2.readline()
list_.append(fas_seq)
with open(str(output), "w") as f:
f.write("\n".join(list_))

How can I use a loop to apply a function to a list of csv files?

I'm trying to loop through all files in a directory and add "indicator" data to them. I had the code working where I could select 1 file and do this, but now am trying to make it work on all files. The problem is when I make the loop it says
ValueError: Invalid file path or buffer object type: <class 'list'>
The goal would be for each loop to read another file from list, make changes, and save file back to folder with changes.
Here is complete code w/o imports. I copied 1 of the "file_path"s from the list and put in comment at bottom.
### open dialog to select file
#file_path = filedialog.askopenfilename()
###create list from dir
listdrs = os.listdir('c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/Sentdex Tutorial/stock_dfs/')
###append full path to list
string = 'c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/Sentdex Tutorial/stock_dfs/'
listdrs_path = [ string + x for x in listdrs]
print (listdrs_path)
###start loop, for each "file" in listdrs run the 2 functions below and overwrite saved csv.
for file in listdrs_path:
file_path = listdrs_path
data = pd.read_csv(file_path, index_col=0)
########################################
####function 1
def get_price_hist(ticker):
# Put stock price data in dataframe
data = pd.read_csv(file_path)
#listdr = os.listdir('Users\17409\AppData\Local\Programs\Python\Python38\Indicators\Sentdex Tutorial\stock_dfs')
print(listdr)
# Convert date to timestamp and make index
data.index = data["Date"].apply(lambda x: pd.Timestamp(x))
data.drop("Date", axis=1, inplace=True)
return data
df = data
##print(data)
######Indicator data#####################
def get_indicators(data):
# Get MACD
data["macd"], data["macd_signal"], data["macd_hist"] = talib.MACD(data['Close'])
# Get MA10 and MA30
data["ma10"] = talib.MA(data["Close"], timeperiod=10)
data["ma30"] = talib.MA(data["Close"], timeperiod=30)
# Get RSI
data["rsi"] = talib.RSI(data["Close"])
return data
#####end functions#######
data2 = get_indicators(data)
print(data2)
data2.to_csv(file_path)
###################################################
#here is an example of what path from list looks like
#'c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/Sentdex Tutorial/stock_dfs/A.csv'
The problem is in line number 13 and 14. Your filename is in variable file but you are using file_path which you've assigned the file list. Because of this you are getting ValueError. Try this:
### open dialog to select file
#file_path = filedialog.askopenfilename()
###create list from dir
listdrs = os.listdir('c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/Sentdex Tutorial/stock_dfs/')
###append full path to list
string = 'c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/Sentdex Tutorial/stock_dfs/'
listdrs_path = [ string + x for x in listdrs]
print (listdrs_path)
###start loop, for each "file" in listdrs run the 2 functions below and overwrite saved csv.
for file_path in listdrs_path:
data = pd.read_csv(file_path, index_col=0)
########################################
####function 1
def get_price_hist(ticker):
# Put stock price data in dataframe
data = pd.read_csv(file_path)
#listdr = os.listdir('Users\17409\AppData\Local\Programs\Python\Python38\Indicators\Sentdex Tutorial\stock_dfs')
print(listdr)
# Convert date to timestamp and make index
data.index = data["Date"].apply(lambda x: pd.Timestamp(x))
data.drop("Date", axis=1, inplace=True)
return data
df = data
##print(data)
######Indicator data#####################
def get_indicators(data):
# Get MACD
data["macd"], data["macd_signal"], data["macd_hist"] = talib.MACD(data['Close'])
# Get MA10 and MA30
data["ma10"] = talib.MA(data["Close"], timeperiod=10)
data["ma30"] = talib.MA(data["Close"], timeperiod=30)
# Get RSI
data["rsi"] = talib.RSI(data["Close"])
return data
#####end functions#######
data2 = get_indicators(data)
print(data2)
data2.to_csv(file_path)
Let me know if it helps.

Can't convert 'bytes' object to str implicitly for DCM to raw file

I learn how to convert DCM file to Raw file .Got the code from Git Hub:
https://github.com/xiasun/dicom2raw/blob/master/dicom2raw.py
And it got a error"Can't convert 'bytes' object to str implicitly" on the line
"allInOne += dataset.PixelData"
I try to use "encode("utf-8")",but it make allInOne to be empty.
By the way ,Is there any code to generate the .mhd file corresponding to the .raw file?
import dicom
import os
import numpy
import sys
dicomPath = "C:/DataLuna16pen/dcmdata/"
lstFilesDCM = [] # create an empty list
for dirName, subdirList, fileList in os.walk(dicomPath):
allInOne = ""
print(subdirList)
i=0
for filename in fileList:
i+=1
if "".join(filename).endswith((".dcm", ".DCM")):
path = dicomPath + "".join(filename)
dataset = dicom.read_file(path)
for n,val in enumerate(dataset.pixel_array.flat):
dataset.pixel_array.flat[n] = val / 60
if val < 0:
dataset.pixel_array.flat[n] = 0
dataset.PixelData = numpy.uint8(dataset.pixel_array).tostring()
allInOne += dataset.PixelData
print ("slice " + "".join(filename) + " done ",end=" ")
print (i)
newFile = open("./all_in_one.raw", "wb")
newFile.write(allInOne)
newFile.close()
print ("RAW file generated")
There are several things:
PyDicom still doesn't read compressed DICOMs properly (loseless jpeg). You should check Transfer Syntax of the files to check if this is the case. As a workaround you can use GDCM tool dcmdjpeg
you should not convert byte array into string (np.array.tostring returns in fact the array of bytes)
for writing mha files, take a look at MedPy. You can also use ITK directly. There is python wrapper and SimpleITK - some kind lightweight modification of ITK

Python ConfigParser Question

Does the Config file for the ConfigParser have to be named "Config.ini" in order to work?
I want the name to be "1Config.ini" so that it appears at the top of a folder dir.
This is what I have currently
config = ConfigParser.ConfigParser()
config.read(Revision[0:Revision.rfind('\\')] + "\1Config.ini")
Type = config.get("myvars", "Type")
I get this error however when the file and code is named "1Config.ini"
<class 'ConfigParser.NoSectionError'>: No section: 'myvars'
What's the output of the following? Make sure it's a valid file name.
>>> print Revision[0:Revision.rfind('\\')] + "\1Config.ini"
Ideally use os.path.join instead of concatenating strings:
import os
filename = os.path.join(Revision[0:Revision.rfind('\\')], "Config.ini")
config.read(filename)
You probably shouldn't name your variable Type, because type is a built-in function/module and it'd be confusing.
Type = config.get("myvars", "Type")
And no, config files can be named anything:
>>> a = ConfigParser.ConfigParser()
>>> a.read("E:/Documents/2012/config.test") # where config.test is the example from the documentation
['E:/Documents/2012/config.test']
>>> a.sections()
['My Section']
>>> a.items(a.sections()[0])
[('foodir', 'frob/whatever'),
('dir', 'frob'),
('long', 'this value continues\nin the next line')]

How do I import xyz and roll/pitch/yaw from csv file to Blender?

I want to know if it is possible to import data of attitude and position (roll/pitch/yaw & xyz) from a comma separated file to Blender?
I recorded data from a little RC car and I want to represent its movement in a 3D world.
I have timestamps too, so if there's a way to animated the movement of the object it'll be superb!!
Any help will be greatly appreciated!!
Best Regards.
A slight modifcation, making use of the csv module
import bpy
import csv
position_vectors = []
filepath = "C:\\Work\\position.log"
csvfile = open(filepath, 'r', newline='')
ofile = csv.reader(csvfile, delimiter=',')
for row in ofile:
position_vectors.append(tuple([float(i) for i in row]))
csvfile.close()
This will get your points into Blender. Note the delimiter parameter in csv.reader, change that accordingly. With a real example file of your RC car we could provide a more complete solution.
For blender v2.62:
If you have a file "positions.log" looking like:
-8.691985196313894e-002; 4.119284642631801e-001; -5.832147659661263e-001
1.037146774956164e+000; 8.137243553005405e-002; -5.703274929662892e-001
-3.602584527944123e-001; 8.378614512537046e-001; 2.615265921163826e-001
6.266465707681335e-001; -1.128416901202341e+000; -1.664644365541639e+000
3.327523280880091e-001; 4.488553740582839e-001; -2.449449085462368e+000
-7.311567199869298e-001; -1.860587923723032e+000; -1.297179602213110e+000
-7.453603745688361e-003; 4.770473577895327e-001; -2.319515785100494e+000
1.935170866863264e-001; -2.010280476717868e+000; 3.748000986190077e-001
5.201529166915653e-001; 3.952972788761738e-001; 1.658581747430548e+000
4.719198263774027e-001; 1.526020825619557e+000; 3.187088567866725e-002
you can read it with this python script in blender (watch out for the indentation!)
import bpy
from mathutils import *
from math import *
from bpy.props import *
import os
import time
# Init
position_vector = []
# Open file
file = open("C:\\Work\\position.log", "r")
# Loop over line in file
for line in file:
# Split line at ";"
splittet_line = line.split(";")
# Append new postion
position_vector.append(
Vector((float(splittet_line[0]),
float(splittet_line[1]),
float(splittet_line[2]))))
# Close file
file.close()
# Get first selected object
selected_object = bpy.context.selected_objects[0]
# Get first selected object
for position in position_vector:
selected_object.location = position
This reads the file and updates the position of the first selected object accordingly. Way forward: What you have to find out is how to set the keyframes for the animation...
Consider this python snippet to add to the solutions above
obj = bpy.context.object
temporalScale=bpy.context.scene.render.fps
for lrt in locRotArray:
obj.location = (lrt[0], lrt[1], lrt[2])
# radians, and do you want XYZ, or ZYX?
obj.rotation_euler = (lrt[3], lrt[4], lrt[5])
time = lrt[6]*temporalScale
obj.keyframe_insert(data_path="location", frame=time)
obj.keyframe_insert(data_path="rotation_euler", frame=time)
I haven't tested it, but it will probably work, and gets you started.
With a spice2xyzv file as input file. The script writed by "Mutant Bob" seems to work.
But the xyz velocity data are km/s not euler angles, I think, and the import does not work for the angles.
# Records are <jd> <x> <y> <z> <vel x> <vel y> <vel z>
# Time is a TDB Julian date
# Position in km
# Velocity in km/sec
2456921.49775 213928288.518 -446198013.001 -55595492.9135 6.9011736 15.130842 0.54325805
Is there a solution to get them in Blender? Should I convert velocity angle to euler, is that possible in fact?
I use this script :
import bpy
from mathutils import *
from math import *
from bpy.props import *
import os
import time
# Init
position_vector = []
# Open file
file = open("D:\\spice2xyzv\\export.xyzv", "r")
obj = bpy.context.object
temporalScale=bpy.context.scene.render.fps
for line in file:
# Split line at ";"
print("line = %s" % line)
line = line.replace("\n","")
locRotArray = line.split(" ")
print("locRotArray = %s" % locRotArray )
#for lrt in locRotArray:
print(locRotArray[1])
obj.location = (float(locRotArray[1]), float(locRotArray[2]), float(locRotArray[3]))
# radians, and do you want XYZ, or ZYX?
obj.rotation_euler = (float(locRotArray[4]), float(locRotArray[5]), float(locRotArray[5]))
time = float(locRotArray[0])*temporalScale
print("time = %s" % time)
obj.keyframe_insert(data_path="location", frame=time)
obj.keyframe_insert(data_path="rotation_euler", frame=time)