This is my data
!pip install yfinance
import yfinance as yf
from pandas_datareader import data
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import urllib.request, json
import os
import numpy as np
data=yf.download('AAPL', period='max', interval='5d' )
# find the log return which is equal to log(1+ri)
data['LogReturn'] = np.log(data['Close']).diff()
data['LogReturn'] = data['LogReturn'].shift(-1)
Fast=10
Slow=30
data['SlowSMA'] = data['Close'].rolling(Slow).mean()
data['FastSMA'] = data['Close'].rolling(Fast).mean()
data['Signal']=np.where(data['FastSMA'] >= data['SlowSMA'], 1, 0)
data['PrevSignal']=data['Signal'].shift(1)
data['Buy'] = (data['PrevSignal'] == 0) & (data['Signal'] == 1)
data['Sell'] = (data['PrevSignal'] == 1) & (data['Signal'] == 0)
def assign_is_invested(row): ## we will look at each row
global is_invested # we can change it outside the function
if is_invested and row['Sell']:
is_invested=False
if not is_invested and row['Buy']:
is_invested=True
return is_invested
data['IsInvested'] = data.apply(assign_is_invested, axis=1)
When I run the above function to get the IsInvested column None and I am expecting True or False. Why is that?
I believe that the problem is with the indentation of the return.
is_invested= False # we are not invested yet no money
def assign_is_invested(row): ## we will look at each row
global is_invested # we can change it outside the function
if is_invested and row['Sell']:
is_invested=False
if not is_invested and row['Buy']:
is_invested=True
return is_invested
data['IsInvested'] = data.apply(assign_is_invested, axis=1)
The function was only returning the is_invested variable if the data matched the criteria in the second IF statement.
Related
I am dealing with a simple loop.
I have a slightly larger dataframe and I would like to use the processor (currently 2%).
I tried this:
import pandas as pd
import numpy as np
import time
from concurrent.futures import ThreadPoolExecutor
scan = pd.DataFrame([[0,2,3,5],[4,2,7,7], [5,6,2,3]], columns=['st1','nd1','st2','nd2'])
def task(value):
calc_all = pd.DataFrame()
for i in range(0,3,2):
j=i+1
calc = pd.concat([pd.DataFrame(scan.iloc[:,i]), pd.DataFrame(scan.iloc[:,j])],axis=1)
calc['th'] = calc.iloc[:,0] + calc.iloc[:,1]
calc_all = pd.concat([calc_all, calc], axis=1)
time.sleep(1) #tested time
return calc_all
if __name__ == '__main__':
with ThreadPoolExecutor(2) as exe:
for result in exe.map(task, range(2)):
print(result)
It's not faster. What did I do wrong?
I am trying to read data from the following link to a data frame without saving locally (this is important). I figured out a way (below), but is there an efficient way to do this?
from urllib.request import urlopen
import pandas as pd
from io import StringIO
from matplotlib.dates import DateFormatter
from datetime import datetime
uri = 'https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=AXA&data=all&year1=2022&month1=12&day1=1&year2=2022&month2=12&day2=1&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4'
data = urlopen(uri, timeout=300).read().decode("utf-8")
dateparse = lambda x: datetime.strptime(x.strip(), '%Y-%m-%d %H:%M')
str1 = data.split('\n')
dfList = []
for ii in range(1,len(str1)):
if len(str1[ii])>0:
df1 = pd.read_csv(StringIO(str1[ii]), parse_dates=[1], date_parser=dateparse, header=None) #Read each string into a dataframe
if not df1.empty:
df2 = df1.iloc[:,0:3] #Get the first five columns
if df2.iloc[0,-1] != 'M': #Don't append the ones with missing data
dfList.append(df2)
df = pd.concat(dfList, axis=0, ignore_index=True)
df.columns = ['Station','Date','Temp']
ax1 = df.plot(x=1,y=2)
ax1.get_figure().autofmt_xdate()
Using requests, pandas and io:
from io import StringIO
import pandas as pd
import requests
url = (
"https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"
"station=AXA&data=all&year1=2022&month1=12&day1=1&year2=2022&"
"month2=12&day2=1&tz=Etc%2FUTC&format=onlycomma&latlon=no&"
"elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4"
)
with requests.Session() as request:
response = request.get(url, timeout=30)
if response.status_code != 200:
print(response.raise_for_status())
df = pd.read_csv(StringIO(response.text), sep=",")
print(df)
I am trying to get anchored vwap from specific date using pandas_ta. How to set anchor to specific date?
import pandas as pd
import yfinance as yf
import pandas_ta as ta
from datetime import datetime, timedelta, date
import warnings
import plac
data = yf.download("aapl", start="2021-07-01", end="2022-08-01")
df = pd.DataFrame(data)
df1 = df.ta.vwap(anchor = "D")
df14 = pd.concat([df, df1],axis=1)
print(df14)
pandas_ta.vwap anchor depending on the index values, as pandas-ta said(reference)
anchor (str): How to anchor VWAP. Depending on the index values, it will
implement various Timeseries Offset Aliases as listed here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
Default: "D".
In further words, you can't specify a specific date as TradingView did.
To anchor a date ourself,
import pandas as pd
import numpy as np
import yfinance as yf
import pandas_ta as ta
# set anchor date
anchored_date = pd.to_datetime('2022-01-30')
data = yf.download("aapl", start="2022-01-01", end="2022-08-01")
df = pd.DataFrame(data)
df1 = df.ta.vwap(anchor = "D")
df14 = pd.concat([df, df1],axis=1)
# I create a column 'typical_price', it should be identical with 'VWAP_D'
df14['typical_price'] = (df14['High'] + df14['Low'] + df14['Close'])/3
tpp_d = ((df14['High'] + df14['Low'] + df14['Close'])*df14['Volume'])/3
df14['anchored_VWAP'] = tpp_d.where(df14.index >= anchored_date).groupby(df14.index >= anchored_date).cumsum()/df14['Volume'].where(df14.index >= anchored_date).groupby(df14.index >= anchored_date).cumsum()
df14
Plot
Im using a csv file as my data source. I want the graph to update based on the radio button selection i make, please find my source code below.
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input,Output
df = pd.read_csv('population2.csv')
fst_yvalues = df['PopEstimate2010']/1000000
scd_yvalues = df['PopEstimate2011']/1000000
trd_yvalues = df['PopEstimate2012']/1000000
app = dash.Dash()
app.layout = html.Div(children=[
html.H1('My first Interactive Graph'),
html.Div(dcc.RadioItems(id='radio_items',
options=[{'label':'PopEstimate2010','value':'pop2010'},
{'label':'PopEstimate2011','value': 'pop2011'},
{'label':'PopEstimate2011' ,'value':'pop2012'}],
value='pop2010')),
html.Br(),
html.Div(children=[
dcc.Graph(id='int_bar')])])
#app.callback(Output('int_bar','figure'),[Input('radio_items','value')])
def bar_chart(value):
trace = []`
if value == 'pop2010':
trarce = [go.Bar(x=df['Name'],y=fst_yvalues)]
elif value == 'pop2011':
trarce = [go.Bar(x=df['Name'],y=scd_yvalues)]
else:
trarce = [go.Bar(x=df['Name'],y=trd_yvalues)]
layout = go.Layout(title='MY FIRST GRAPH',
xaxis=dict(title='MY X-AXIS'),
yaxis=dict(title='MY Y-AXIS'),hovermode='closest')
figure = go.Figure(data=trace,layout=layout)enter code here
return figure
if __name__ == '__main__':
app.run_server(debug=True)
When i try to run this, it only gives me the layout but not the actual graph.
Below is the approach i have taken to get this to work :
#app.callback(Output('int_bar','figure'),[Input('radio_items','value')])
def make_bar_chart(value):
if value == 'pop2010':
figure = {'data': [go.Bar(x=df['Name'],y=fst_yvalues)],
'layout': go.Layout(title='MY FIRST GRAPH',
xaxis=dict(title='MY X-AXIS'),
yaxis=dict(title='MY Y-AXIS'),hovermode='closest')
}
...
...
return figure
if __name__ == '__main__':
app.run_server(debug=True)
Does anyone know how one goes about enabling the REFS_OK flag in numpy? I cannot seem to find a clear explanation online.
My code is:
import sys
import string
import numpy as np
import pandas as pd
SNP_df = pd.read_csv('SNPs.txt',sep='\t',index_col = None ,header = None,nrows = 101)
output = open('100 SNPs.fa','a')
for i in SNP_df:
data = SNP_df[i]
data = np.array(data)
for j in np.nditer(data):
if j == 0:
output.write(("\n>%s\n")%(str(data(j))))
else:
output.write(data(j))
I keep getting the error message: Iterator operand or requested dtype holds references, but the REFS_OK was not enabled.
I cannot work out how to enable the REFS_OK flag so the program can continue...
I have isolated the problem. There is no need to use np.nditer. The main problem was with me misinterpreting how Python would read iterator variables in a for loop. The corrected code is below.
import sys
import string
import fileinput
import numpy as np
SNP_df = pd.read_csv('datafile.txt',sep='\t',index_col = None ,header = None,nrows = 5000)
output = open('outputFile.fa','a')
for i in range(1,51):
data = SNP_df[i]
data = np.array(data)
for j in range(0,1):
output.write(("\n>%s\n")%(str(data[j])))
for k in range(1,len(data)):
output.write(str(data[k]))
If you really want to enable the flag, I have an working example.
Python 2.7, numpy 1.14.2, pandas 0.22.0
import pandas as pd
import numpy as np
# get all data as panda DataFrame
data = pd.read_csv("./monthdata.csv")
print(data)
# get values as numpy array
data_ar = data.values # numpy.ndarray, every element is a row
for row in data_ar:
print(row)
sum = 0
count = 0
for month in np.nditer(row, flags=["refs_OK"], op_flags=["readwrite"]):
print month