In Middleman how do I generate day, month, and year blog archive links? - middleman

I'm trying to generate year and month archive links.
Right now I can set one of the three.
When I set two or more and look at the Sitemap I get the error:
NoMethodError at /__middleman/sitemap/
undefined method `add_path' for #<Middleman::MetaPages::SitemapResource:0x007fedd61597a0>`
Here is my blog from config.rb
activate :blog do |blog|
blog.name = "blog"
blog.permalink = "/{title}/"
blog.taglink = "blog/:tag.html"
blog.year_link = "blog/{year}/"
blog.month_link = "blog/{year}/{month}/"
blog.day_link = "blog/{year}/{month}/{day}/"
blog.sources = "blog/{year}-{month}-{day}-{title}.html"
#blog.year_template = "blog/calendar.html"
blog.month_template = "blog/calendar.html"
#blog.day_template = "blog/calendar.html"
blog.layout = "blog-layout"
blog.tag_template = "blog/tag.html"
#blog.calendar_template = "blog/calendar.html"
# This will add a prefix to all links, template references and source paths
# blog.prefix = "blog"
# Matcher for blog source files
# blog.sources = "{year}-{month}-{day}-{title}.html"
# blog.summary_separator = /(READMORE)/
# blog.summary_length = 250
# blog.default_extension = ".markdown"
# Enable pagination
# blog.paginate = true
# blog.per_page = 10
# blog.page_link = "page/{num}"
end

Related

Appending tables generated from a loop

I am a new python user here and am trying to append data together that I have pulled from a pdf using Camelot but am having trouble getting them to join together.
Here is my code:
url = 'https://www.fhfa.gov/DataTools/Downloads/Documents/HPI/HPI_AT_Tables.pdf'
tables = camelot.read_pdf(url,flavor='stream', edge_tol = 500, pages = '1-end')
i = 0
while i in range(0,tables.n):
header = tables[i].df.index[tables[i].df.iloc[:,0]=='Metropolitan Statistical Area'].to_list()
header = str(header)[1:-1]
header = (int(header))
tables[i].df = tables[i].df.rename(columns = tables[i].df.iloc[header])
tables[i].df = tables[i].df.drop(columns = {'': 'Blank'})
print(tables[i].df)
#appended_data.append(tables[i].df)
#if i > 0:
# dfs = tables[i-1].append(tables[i], ignore_index = True)
#pass
i = i + 1
any help would be much appreciated
You can use pandas.concat() to concat a list of dataframe.
while i in range(0,tables.n):
header = tables[i].df.index[tables[i].df.iloc[:,0]=='Metropolitan Statistical Area'].to_list()
header = str(header)[1:-1]
header = (int(header))
tables[i].df = tables[i].df.rename(columns = tables[i].df.iloc[header])
tables[i].df = tables[i].df.drop(columns = {'': 'Blank'})
df_ = pd.concat([table.df for table in tables])

How can I use a loop to apply a function to a list of csv files?

I'm trying to loop through all files in a directory and add "indicator" data to them. I had the code working where I could select 1 file and do this, but now am trying to make it work on all files. The problem is when I make the loop it says
ValueError: Invalid file path or buffer object type: <class 'list'>
The goal would be for each loop to read another file from list, make changes, and save file back to folder with changes.
Here is complete code w/o imports. I copied 1 of the "file_path"s from the list and put in comment at bottom.
### open dialog to select file
#file_path = filedialog.askopenfilename()
###create list from dir
listdrs = os.listdir('c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/Sentdex Tutorial/stock_dfs/')
###append full path to list
string = 'c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/Sentdex Tutorial/stock_dfs/'
listdrs_path = [ string + x for x in listdrs]
print (listdrs_path)
###start loop, for each "file" in listdrs run the 2 functions below and overwrite saved csv.
for file in listdrs_path:
file_path = listdrs_path
data = pd.read_csv(file_path, index_col=0)
########################################
####function 1
def get_price_hist(ticker):
# Put stock price data in dataframe
data = pd.read_csv(file_path)
#listdr = os.listdir('Users\17409\AppData\Local\Programs\Python\Python38\Indicators\Sentdex Tutorial\stock_dfs')
print(listdr)
# Convert date to timestamp and make index
data.index = data["Date"].apply(lambda x: pd.Timestamp(x))
data.drop("Date", axis=1, inplace=True)
return data
df = data
##print(data)
######Indicator data#####################
def get_indicators(data):
# Get MACD
data["macd"], data["macd_signal"], data["macd_hist"] = talib.MACD(data['Close'])
# Get MA10 and MA30
data["ma10"] = talib.MA(data["Close"], timeperiod=10)
data["ma30"] = talib.MA(data["Close"], timeperiod=30)
# Get RSI
data["rsi"] = talib.RSI(data["Close"])
return data
#####end functions#######
data2 = get_indicators(data)
print(data2)
data2.to_csv(file_path)
###################################################
#here is an example of what path from list looks like
#'c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/Sentdex Tutorial/stock_dfs/A.csv'
The problem is in line number 13 and 14. Your filename is in variable file but you are using file_path which you've assigned the file list. Because of this you are getting ValueError. Try this:
### open dialog to select file
#file_path = filedialog.askopenfilename()
###create list from dir
listdrs = os.listdir('c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/Sentdex Tutorial/stock_dfs/')
###append full path to list
string = 'c:/Users/17409/AppData/Local/Programs/Python/Python38/Indicators/Sentdex Tutorial/stock_dfs/'
listdrs_path = [ string + x for x in listdrs]
print (listdrs_path)
###start loop, for each "file" in listdrs run the 2 functions below and overwrite saved csv.
for file_path in listdrs_path:
data = pd.read_csv(file_path, index_col=0)
########################################
####function 1
def get_price_hist(ticker):
# Put stock price data in dataframe
data = pd.read_csv(file_path)
#listdr = os.listdir('Users\17409\AppData\Local\Programs\Python\Python38\Indicators\Sentdex Tutorial\stock_dfs')
print(listdr)
# Convert date to timestamp and make index
data.index = data["Date"].apply(lambda x: pd.Timestamp(x))
data.drop("Date", axis=1, inplace=True)
return data
df = data
##print(data)
######Indicator data#####################
def get_indicators(data):
# Get MACD
data["macd"], data["macd_signal"], data["macd_hist"] = talib.MACD(data['Close'])
# Get MA10 and MA30
data["ma10"] = talib.MA(data["Close"], timeperiod=10)
data["ma30"] = talib.MA(data["Close"], timeperiod=30)
# Get RSI
data["rsi"] = talib.RSI(data["Close"])
return data
#####end functions#######
data2 = get_indicators(data)
print(data2)
data2.to_csv(file_path)
Let me know if it helps.

How to pass a plot (renderPlot) from shiny app as parameter to R Markdown?

I'm trying to download report form shiny app using R Markdown, but I'm lost! I need to pass a plot from shiny as parameter to R Markdown, and then, include this plot in my report.
I searched a lot about this, but I couldn't find anything. How can I plot this in my report?
Server.R
lm_dif_filter <- reactive({
lm_dif_corn[(lm_dif_corn$farmer == input$farmer) & (lm_dif_corn$Treat_X == 'Farmer'),]
})
output$difPlot <- renderPlotly({
dif <- ggplot(data=lm_dif_filter(), aes(x=Treat_Y, y=dif)) +
geom_bar(stat="identity",color = 'black', position=position_dodge(), width = 0.7)+
geom_hline(yintercept = 0) +
#annotate("text", min(Treat_Y), 0, vjust = -1, label = "Farmer")+
theme(legend.position = "none") +
labs(x = "Treats", y = "Diff")
ggplotly(dif)
To download:
output$report <- downloadHandler(
filename = "report.pdf",
content = function(file) {
tempReport <- file.path(tempdir(), "report.Rmd")
file.copy("report.Rmd", tempReport, overwrite = TRUE)
# Set up parameters to pass to Rmd document
params <- list(set_subtitle = input$farmer, plot = output$difPlot)
rmarkdown::render(tempReport, output_file = file,
params = params,
envir = new.env(parent = globalenv())
)
}
)
My report.rmd
---
title: "Some title"
params:
set_subtitle: test
plot: NA
subtitle: "`r params$set_subtitle`"
date: '`r format(Sys.Date(), "%B %d, %Y")`'
output:
pdf_document:
toc: yes
header-includes:
- \usepackage{fancyhdr}
always_allow_html: yes
---
\addtolength{\headheight}{1.0cm}
\pagestyle{fancyplain}
\lhead{\includegraphics[height=1.2cm]{bg.png}}
\renewcommand{\headrulewidth}{0pt}
```{r, include=FALSE}
options(tinytex.verbose = TRUE)
knitr::opts_chunk$set(echo = FALSE)
cat(params$plot)
One easy option is to not pass the plot, and instead pass the parameter, and refer to a shared plot function used by the shiny app and Rmd doc. For example,
Shiny app,
note the source("util.R") and report_hist(params$n)
source("util.R")
library(shiny)
shinyApp(
ui = fluidPage(
sliderInput("slider", "Slider", 1, 100, 50),
downloadButton("report", "Generate report"),
plotOutput("report_hist")
),
server = function(input, output) {
output$report_hist <- renderPlot({
report_hist(n = input$slider)
})
output$report <- downloadHandler(
# For PDF output, change this to "report.pdf"
filename = "report.html",
content = function(file) {
# Copy the report file to a temporary directory before processing it, in
# case we don't have write permissions to the current working dir (which
# can happen when deployed).
tempReport <- file.path(tempdir(), "report.Rmd")
file.copy("report.Rmd", tempReport, overwrite = TRUE)
# Set up parameters to pass to Rmd document
params <- list(n = input$slider)
# Knit the document, passing in the `params` list, and eval it in a
# child of the global environment (this isolates the code in the document
# from the code in this app).
rmarkdown::render(tempReport, output_file = file,
params = params,
envir = new.env(parent = globalenv())
)
}
)
}
)
Rmd report,
note the report_hist(params$n)
---
title: "Dynamic report"
output: html_document
params:
n: NA
---
```{r}
# The `params` object is available in the document.
params$n
```
A plot of `params$n` random points.
```{r}
report_hist(params$n) #note this function was created in util.R and loaded by the shiny app.
```
Shared function in util.R
report_hist <- function(n){
hist(rnorm(n))
}
Here's a demo shiny app you can test it out with, https://rstudio.cloud/project/295626

Disable or Bypass login of odoo when trying to hit from external application

I am trying to open odoo url from my hosted application but its redirecting to login screen. As user is already logged in to my application logically user should not be redirected to login screen again...How can I bypass this security check of odoo???
Thanks In advance
From your question, I think what you are trying to achieve is to authenticate your user's odoo session automatically if that user is already authenticated in your non-odoo application. To achieve that, you can implement your application such that, on authentication of a user, your backend will authenticate a session in odoo with corresponding user, and set session_id cookie of user's browser to that authenticated session_id. I guess that may be achievable if both application are served under same domain with reverse proxying using nginx or apache, as other's already commented, there is no way you can totally disable or bypass authentication of odoo itself, as this is a well developed business related software, and that will just defeat it's purpose.
It is possible to bypass the security of odoo. These changes are required in these two files
`
**server/odoo/http.py**
line number 406 in odoo 12
def validate_csrf(self, csrf):
# if not csrf:
# return False
#
# try:
# hm, _, max_ts = str(csrf).rpartition('o')
# except UnicodeEncodeError:
# return False
#
# if max_ts:
# try:
# if int(max_ts) < int(time.time()):
# return False
# except ValueError:
# return False
#
# token = self.session.sid
#
# msg = '%s%s' % (token, max_ts)
# secret = self.env['ir.config_parameter'].sudo().get_param('database.secret')
# assert secret, "CSRF protection requires a configured database secret"
# hm_expected = hmac.new(secret.encode('ascii'), msg.encode('utf-8'), hashlib.sha1).hexdigest()
# return consteq(hm, hm_expected)
return True
def setup_session(self, httprequest):
explicit_session = True
# recover or create session
# session_gc(self.session_store)
#
# sid = httprequest.args.get('session_id')
# explicit_session = True
# if not sid:
# sid = httprequest.headers.get("X-Openerp-Session-Id")
# if not sid:
# sid = httprequest.cookies.get('session_id')
# explicit_session = False
# if sid is None:
# httprequest.session = self.session_store.new()
# else:
# httprequest.session = self.session_store.get(sid)
httprequest.session = self.session_store.new()
httprequest.session.uid =2
httprequest.session.login = 'root'
httprequest.session.db = 'odoo'
httprequest.session.sid = '7aa5500f30365aead781465ec08bbb03c3a5024b'
return explicit_session
line number 1348
def setup_session(self, httprequest):
explicit_session = True
# recover or create session
# session_gc(self.session_store)
#
# sid = httprequest.args.get('session_id')
# explicit_session = True
# if not sid:
# sid = httprequest.headers.get("X-Openerp-Session-Id")
# if not sid:
# sid = httprequest.cookies.get('session_id')
# explicit_session = False
# if sid is None:
# httprequest.session = self.session_store.new()
# else:
# httprequest.session = self.session_store.get(sid)
httprequest.session = self.session_store.new()
httprequest.session.uid =2
httprequest.session.login = 'root'
httprequest.session.db = 'odoo'
httprequest.session.sid = '7aa5500f30365aead781465ec08bbb03c3a5024b'
return explicit_session
**server/odoo/service/security.py**
line number 18
def check_session(session, env):
# self = env['res.users'].browse(session.uid)
# expected = self._compute_session_token(session.sid)
# if expected and odoo.tools.misc.consteq(expected, session.session_token):
# return True
# self._invalidate_session_cache()
return True

Break document sections into list for export Python

I am very new to Python, and I am trying to break some legal documents into sections for export into SQL. I need to do two things:
Define the section numbers by the table of contents, and
Break up the document given the defined section numbers
The table of contents lists section numbers: 1.1, 1.2, 1.3, etc.
Then the document itself is broken up by those section numbers:
1.1 "...Text...",
1.2 "...Text...",
1.3 "...Text...", etc.
Similar to the chapters of a book, but delimited by ascending decimal numbers.
I have the document parsed using Tika, and I've been able to create a list of sections with some basic regex:
import tika
import re
from tika import parser
parsed = parser.from_file('test.pdf')
content = (parsed["content"])
headers = re.findall("[0-9]*[.][0-9]",content)
Now I need to do something like this:
splitsections = content.split() by headers
var_string = ', '.join('?' * len(splitsections))
query_string = 'INSERT INTO table VALUES (%s);' % var_string
cursor.execute(query_string, splitsections)
Sorry if all this is unclear. Still very new to this.
Any help you can provide would be most appreciated.
Everything tested except the last part with DB. Also the code can be improved, but this is another task. The main task is done.
In the list split_content there are all pieces of info you wanted (i.e. the text between 2.1 and 2.2, then 2.2 and 2.3, and so on, EXCLUDING num+name of sections itself (i.e. excluding 2.1 Continuation, 2.2 Name and so on).
I replaced tika by PyPDF2, as tika does not provide instruments needed for this task (i.e. I did not find how to provide the num of page I need and get its content).
def get_pdf_content(pdf_path,
start_page_table_contents, end_page_table_contents,
first_parsing_page, last_phrase_to_stop):
"""
:param pdf_path: Full path to the PDF file
:param start_page_table_contents: The page where the "Contents table" starts
:param end_page_table_contents: The page where the "Contents Table" ends
(i.e. the number of the page where Contents Table ENDs, i.e. not the next one)
:param first_parsing_page: The 1st page where we need to start data grabbing
:param last_phrase_to_stop: The phrase that tells the code where to stop grabbing.
The phrase must match exactly what is written in PDF.
This phrase will be excluded from the grabbed data.
:return:
"""
# ======== GRAB TABLE OF CONTENTS ========
start_page = start_page_table_contents
end_page = end_page_table_contents
table_of_contents_page_nums = range(start_page-1, end_page)
sections_of_articles = [] # ['2.1 Continuation', '2.2 Name', ... ]
open_file = open(pdf_path, "rb")
pdf = PyPDF2.PdfFileReader(open_file)
for page_num in table_of_contents_page_nums:
page_content = pdf.getPage(page_num).extractText()
page_sections = re.findall("[\d]+[.][\d][™\s\w;,-]+", page_content)
for section in page_sections:
cleared_section = section.replace('\n', '').strip()
sections_of_articles.append(cleared_section)
# ======== GRAB ALL NECESSARY CONTENT (MERGE ALL PAGES) ========
total_num_pages = pdf.getNumPages()
parsing_pages = range(first_parsing_page-1, total_num_pages)
full_parsing_content = '' # Merged pages
for parsing_page in parsing_pages:
page_content = pdf.getPage(parsing_page).extractText()
cleared_page = page_content.replace('\n', '')
# Remove page num from the start of "page_content"
# Covers the case with the page 65, 71 and others when the "page_content" starts
# with, for example, "616.6 Liability to Partners. (a) It is understood that"
# i.e. "61" is the page num and "6.6 Liability ..." is the section data
already_cleared = False
first_50_chars = cleared_page[:51]
for section in sections_of_articles:
if section in first_50_chars:
indx = cleared_page.index(section)
cleared_page = cleared_page[indx:]
already_cleared = True
break
# Covers all other cases
if not already_cleared:
page_num_to_remove = re.match(r'^\d+', cleared_page)
if page_num_to_remove:
cleared_page = cleared_page[len(str(page_num_to_remove.group(0))):]
full_parsing_content += cleared_page
# ======== BREAK ALL CONTENT INTO PIECES ACCORDING TO TABLE CONTENTS ========
split_content = []
num_sections = len(sections_of_articles)
for num_section in range(num_sections):
start = sections_of_articles[num_section]
# Get the last piece, i.e. "11.16 FATCA" (as there is no any "end" section after "11.16 FATCA", so we cant use
# the logic like "grab info between sections 11.1 and 11.2, 11.2 and 11.3 and so on")
if num_section == num_sections-1:
end = last_phrase_to_stop
else:
end = sections_of_articles[num_section + 1]
content = re.search('%s(.*)%s' % (start, end), full_parsing_content).group(1)
cleared_piece = content.replace('™', "'").strip()
if cleared_piece[0:3] == '. ':
cleared_piece = cleared_piece[3:]
# There are few appearances of "[Signature Page Follows]", as a "last_phrase_to_stop".
# We need the text between "11.16 FATCA" and the 1st appearance of "[Signature Page Follows]"
try:
indx = cleared_piece.index(end)
cleared_piece = cleared_piece[:indx]
except ValueError:
pass
split_content.append(cleared_piece)
# ======== INSERT TO DB ========
# Did not test this section
for piece in split_content:
var_string = ', '.join('?' * len(piece))
query_string = 'INSERT INTO table VALUES (%s);' % var_string
cursor.execute(query_string, parts)
How to use: (one of the possible way):
1) Save the code above in my_pdf_code.py
2) In the python shell:
import path.to.my_pdf_code as the_code
the_code.get_pdf_content('/home/username/Apollo_Investment_Fund_VIII_LPA_S1.pdf', 2, 4, 24, '[Signature Page Follows]')