Google Analytics data api dictionary to pandas data frame - pandas

I exported google analytics data in below dictionary format with 3 Dimensions and 2 metrics. How can I change this format to pandas data frame. I don't need the columns rowCount,minimums,maximums,nextPageToken. Thank you
{'reports': [{'columnHeader': {'dimensions': ['ga:date', 'ga:eventCategory',
'ga:eventAction'], 'metricHeader': {'metricHeaderEntries': [{'name': 'ga:totalEvents', 'type':
'INTEGER'}
, {'name': 'ga:UniqueEvents', 'type': 'INTEGER'}, {'name': 'ga:eventvalue', 'type':
'INTEGER'}]}},
'data':
{'rows': [{'dimensions': ['20220820', 'accordion ', 'accordion'], 'metrics':
[{'values': ['547', '528', '0']}]},
{'dimensions': ['20220817', 'accordion click', 'benefits'], 'metrics': [{'values': ['26',
'26', '0']}]},
{'dimensions': ['20220818', 'accordion click', 'for-your-dog '], 'metrics': [{'values': ['1',
'1', '0']}]},
{'dimensions': ['20220819', 'account', 'register'], 'metrics': [{'values': ['1465', '1345',
'0']}]},
{'dimensions': ['20220820', 'account', 'reminders'], 'metrics': [{'values': ['59', '54',
'0']}]},
, 'rowCount': 17, 'minimums': [{'values': ['1', '1', '0']}], 'maximums': [{'values':
['40676', '37725', '5001337']}]}, 'nextPageToken': '1000'}]}
final dataframe format below

Related

Slicing PySpark DataFrame by converting to Pandas DataFrame, Error when converting back to PySpark DataFrame

I want to slice a PySpark DataFrame by selecting a specific column and several rows as below:
import pandas as pd
# Data filled in our DataFrame
rows = [['Lee Chong Wei', 69, 'Malaysia'],
['Lin Dan', 66, 'China'],
['Srikanth Kidambi', 9, 'India'],
['Kento Momota', 15, 'Japan']]
# Columns of our DataFrame
columns = ['Player', 'Titles', 'Country']
# DataFrame is created
df = spark.createDataFrame(rows, columns)
# Converting DataFrame to pandas
pandas_df = df.toPandas()
# First DataFrame formed by slicing
df1 = pandas_df.iloc[[2], :2]
# Second DataFrame formed by slicing
df2 = pandas_df.iloc[[2], 2:]
# Converting the slices to PySpark DataFrames
df1 = spark.createDataFrame(df1, schema = "Country")
df2 = spark.createDataFrame(df2, schema = "Country")
I am running a notebook on Databricks and no need to import Spark Session.
There is an error message ParseException: when running following lines:
df1 = spark.createDataFrame(df1, schema = "Country")
df2 = spark.createDataFrame(df2, schema = "Country")
Please let me know any idea to solve this issue. Full error message is as below:
---------------------------------------------------------------------------
ParseException Traceback (most recent call last)
<command-4065192899858765> in <module>
23
24 # Converting the slices to PySpark DataFrames
---> 25 df1 = spark.createDataFrame(df1, schema = "Country")
26 df2 = spark.createDataFrame(df2, schema = "Country")
/databricks/spark/python/pyspark/sql/session.py in createDataFrame(self, data, schema, samplingRatio, verifySchema)
706
707 if isinstance(schema, str):
--> 708 schema = _parse_datatype_string(schema)
709 elif isinstance(schema, (list, tuple)):
710 # Must re-encode any unicode strings to be consistent with StructField names
/databricks/spark/python/pyspark/sql/types.py in _parse_datatype_string(s)
841 return from_ddl_datatype("struct<%s>" % s.strip())
842 except:
--> 843 raise e
844
845
/databricks/spark/python/pyspark/sql/types.py in _parse_datatype_string(s)
831 try:
832 # DDL format, "fieldname datatype, fieldname datatype".
--> 833 return from_ddl_schema(s)
834 except Exception as e:
835 try:
/databricks/spark/python/pyspark/sql/types.py in from_ddl_schema(type_str)
823 def from_ddl_schema(type_str):
824 return _parse_datatype_json_string(
--> 825 sc._jvm.org.apache.spark.sql.types.StructType.fromDDL(type_str).json())
826
827 def from_ddl_datatype(type_str):
/databricks/spark/python/lib/py4j-0.10.9.1-src.zip/py4j/java_gateway.py in __call__(self, *args)
1302
1303 answer = self.gateway_client.send_command(command)
-> 1304 return_value = get_return_value(
1305 answer, self.gateway_client, self.target_id, self.name)
1306
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
121 # Hide where the exception came from that shows a non-Pythonic
122 # JVM exception message.
--> 123 raise converted from None
124 else:
125 raise
ParseException:
mismatched input '<EOF>' expecting {'APPLY', 'CALLED', 'CHANGES', 'CLONE', 'COLLECT', 'CONTAINS', 'CONVERT', 'COPY', 'COPY_OPTIONS', 'CREDENTIAL', 'CREDENTIALS', 'DEEP', 'DEFINER', 'DELTA', 'DETERMINISTIC', 'ENCRYPTION', 'EXPECT', 'FAIL', 'FILES', 'FORMAT_OPTIONS', 'HISTORY', 'INCREMENTAL', 'INPUT', 'INVOKER', 'LANGUAGE', 'LIVE', 'MATERIALIZED', 'MODIFIES', 'OPTIMIZE', 'PATTERN', 'READS', 'RESTORE', 'RETURN', 'RETURNS', 'SAMPLE', 'SCD TYPE 1', 'SCD TYPE 2', 'SECURITY', 'SEQUENCE', 'SHALLOW', 'SNAPSHOT', 'SPECIFIC', 'SQL', 'STORAGE', 'STREAMING', 'UPDATES', 'UP_TO_DATE', 'VIOLATION', 'ZORDER', 'ADD', 'AFTER', 'ALL', 'ALTER', 'ALWAYS', 'ANALYZE', 'AND', 'ANTI', 'ANY', 'ARCHIVE', 'ARRAY', 'AS', 'ASC', 'AT', 'AUTHORIZATION', 'BETWEEN', 'BOTH', 'BUCKET', 'BUCKETS', 'BY', 'CACHE', 'CASCADE', 'CASE', 'CAST', 'CATALOG', 'CATALOGS', 'CHANGE', 'CHECK', 'CLEAR', 'CLUSTER', 'CLUSTERED', 'CODE', 'CODEGEN', 'COLLATE', 'COLLECTION', 'COLUMN', 'COLUMNS', 'COMMENT', 'COMMIT', 'COMPACT', 'COMPACTIONS', 'COMPUTE', 'CONCATENATE', 'CONSTRAINT', 'COST', 'CREATE', 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER', 'DAY', 'DATA', 'DATABASE', 'DATABASES', 'DATEADD', 'DATEDIFF', 'DBPROPERTIES', 'DEFAULT', 'DEFINED', 'DELETE', 'DELIMITED', 'DESC', 'DESCRIBE', 'DFS', 'DIRECTORIES', 'DIRECTORY', 'DISTINCT', 'DISTRIBUTE', 'DIV', 'DROP', 'ELSE', 'END', 'ESCAPE', 'ESCAPED', 'EXCEPT', 'EXCHANGE', 'EXISTS', 'EXPLAIN', 'EXPORT', 'EXTENDED', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FIELDS', 'FILTER', 'FILEFORMAT', 'FIRST', 'FN', 'FOLLOWING', 'FOR', 'FOREIGN', 'FORMAT', 'FORMATTED', 'FROM', 'FULL', 'FUNCTION', 'FUNCTIONS', 'GENERATED', 'GLOBAL', 'GRANT', 'GRANTS', 'GROUP', 'GROUPING', 'HAVING', 'HOUR', 'IDENTITY', 'IF', 'IGNORE', 'IMPORT', 'IN', 'INCREMENT', 'INDEX', 'INDEXES', 'INNER', 'INPATH', 'INPUTFORMAT', 'INSERT', 'INTERSECT', 'INTERVAL', 'INTO', 'IS', 'ITEMS', 'JOIN', 'KEY', 'KEYS', 'LAST', 'LATERAL', 'LAZY', 'LEADING', 'LEFT', 'LIKE', 'ILIKE', 'LIMIT', 'LINES', 'LIST', 'LOAD', 'LOCAL', 'LOCATION', 'LOCK', 'LOCKS', 'LOGICAL', 'MACRO', 'MAP', 'MATCHED', 'MERGE', 'MINUTE', 'MONTH', 'MSCK', 'NAMESPACE', 'NAMESPACES', 'NATURAL', 'NO', NOT, 'NULL', 'NULLS', 'OF', 'ON', 'ONLY', 'OPTION', 'OPTIONS', 'OR', 'ORDER', 'OUT', 'OUTER', 'OUTPUTFORMAT', 'OVER', 'OVERLAPS', 'OVERLAY', 'OVERWRITE', 'PARTITION', 'PARTITIONED', 'PARTITIONS', 'PERCENTILE_CONT', 'PERCENT', 'PIVOT', 'PLACING', 'POSITION', 'PRECEDING', 'PRIMARY', 'PRINCIPALS', 'PROPERTIES', 'PROVIDER', 'PROVIDERS', 'PURGE', 'QUALIFY', 'QUERY', 'RANGE', 'RECIPIENT', 'RECIPIENTS', 'RECORDREADER', 'RECORDWRITER', 'RECOVER', 'REDUCE', 'REFERENCES', 'REFRESH', 'REMOVE', 'RENAME', 'REPAIR', 'REPEATABLE', 'REPLACE', 'REPLICAS', 'RESET', 'RESPECT', 'RESTRICT', 'REVOKE', 'RIGHT', RLIKE, 'ROLE', 'ROLES', 'ROLLBACK', 'ROLLUP', 'ROW', 'ROWS', 'SECOND', 'SCHEMA', 'SCHEMAS', 'SELECT', 'SEMI', 'SEPARATED', 'SERDE', 'SERDEPROPERTIES', 'SESSION_USER', 'SET', 'MINUS', 'SETS', 'SHARE', 'SHARES', 'SHOW', 'SKEWED', 'SOME', 'SORT', 'SORTED', 'START', 'STATISTICS', 'STORED', 'STRATIFY', 'STRUCT', 'SUBSTR', 'SUBSTRING', 'SYNC', 'SYSTEM_TIME', 'SYSTEM_VERSION', 'TABLE', 'TABLES', 'TABLESAMPLE', 'TBLPROPERTIES', TEMPORARY, 'TERMINATED', 'THEN', 'TIME', 'TIMESTAMP', 'TIMESTAMPADD', 'TIMESTAMPDIFF', 'TO', 'TOUCH', 'TRAILING', 'TRANSACTION', 'TRANSACTIONS', 'TRANSFORM', 'TRIM', 'TRUE', 'TRUNCATE', 'TRY_CAST', 'TYPE', 'UNARCHIVE', 'UNBOUNDED', 'UNCACHE', 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLOCK', 'UNSET', 'UPDATE', 'USE', 'USER', 'USING', 'VALUES', 'VERSION', 'VIEW', 'VIEWS', 'WHEN', 'WHERE', 'WINDOW', 'WITH', 'WITHIN', 'YEAR', 'ZONE', IDENTIFIER, BACKQUOTED_IDENTIFIER}(line 1, pos 7)
== SQL ==
Country
-------^^^

Odoo automatic method call in model

I have this model:
class Data(models.Model):
_name = 'aibot.data'
_description = 'aibot.data'
symbol = fields.Char(string='Symbol', required=False)
ref = fields.Float(string='Amount', required=False)
amount = fields.Float(string='Amount', required=False)
user_id = fields.Many2one('res.users', string='User', required=True, ondelete='cascade',
default=lambda self: self.env.uid, help="User")
I have this python dict:
m = [
{'symbol': '2', 'ref': 7.8, 'amount': 87},
{'symbol': '2', 'ref': 7.8, 'amount': 25},
{'symbol': '2', 'ref': 7.8, 'amount': 31},
{'symbol': '2', 'ref': 7.8, 'amount': 26},
{'symbol': '2', 'ref': 7.8, 'amount': 90},
{'symbol': '2', 'ref': 7.8, 'amount': -18}
]
And this method:
def rep(self):
parse = 1
self.search([('create_uid', '=', 'user.id')]).unlink()
m = [
{'symbol': '2', 'ref': 7.8, 'amount': 87},
{'symbol': '2', 'ref': 7.8, 'amount': 25},
{'symbol': '2', 'ref': 7.8, 'amount': 31},
{'symbol': '2', 'ref': 7.8, 'amount': 26},
{'symbol': '2', 'ref': 7.8, 'amount': 90},
{'symbol': '2', 'ref': 7.8, 'amount': -18}
]
for i in m:
print('hola', '')
self.env['aibot.data'].create(i)
All works fine. But, I need automatic execution of this method on Data class call from tree view, report..., to fill out the table before anything else.
If table aibot.data is to hold report data lines of the report table, you can do the following to fill in automatically when the report object record will be created:
m = [
{'symbol': '2', 'ref': 7.8, 'amount': 87},
{'symbol': '2', 'ref': 7.8, 'amount': 25},
{'symbol': '2', 'ref': 7.8, 'amount': 31},
{'symbol': '2', 'ref': 7.8, 'amount': 26},
{'symbol': '2', 'ref': 7.8, 'amount': 90},
{'symbol': '2', 'ref': 7.8, 'amount': -18}
]
class Report(models.Model):
_name = 'aibot.report'
_description = 'this is the report object'
# data lines for the report
data_ids = fields.One2many('aibot.data','report_id')
#api.model
def default_get(self, fields_list):
res = super().default_get(fields_list)
data_ids = []
for i in m:
data_ids.append((0,0,i))
res['data_ids'] = data_ids
return res
class Data(models.Model):
_name = 'aibot.data'
report_id = fields.Many2one('aibot.report')
symbol = fields.Char(string='Symbol', required=False)
ref = fields.Float(string='Amount', required=False)
amount = fields.Float(string='Amount', required=False)

How to generate an invoice from a custom module in Odoo13?

I am developing a custom module.
I tried to add it through an object button with the following code but doesn't seem to work
def create_invoice(self):
rslt = self.env['account.invoice'].create({
'partner_id': self.instructor.id,
'name': 'customer invoice',
'type': 'out_invoice',
'date_invoice': 'create_date'
})
return rslt
How can I add a button that generates an invoice?
desu
From Odoo13 there is a change in invoice object, It is now account.move instead of account.invoice.You can take this reference demo example.
invoice = self.env['account.move'].create({
'type': 'out_invoice',
'journal_id': journal.id,
'partner_id': product_id.id,
'invoice_date': date_invoice,
'date': date_invoice,
'invoice_line_ids': [(0, 0, {
'product_id': product_id.id,
'quantity': 40.0,
'name': 'product test 1',
'discount': 10.00,
'price_unit': 2.27,
})]
})

Pandas - Extracting value based of common key

I have a Dataframe in the below format:
id, key1, key2
101, {'key': 'key_1001', 'fields': {'type': {'subtask': False}, 'summary': 'Title_1' , 'id': '71150'}}, NaN
101, NaN,{'key': 'key_1002', 'fields': {'type': {'subtask': False}, 'summary': 'Title_2' , 'id': '71151'}}
102, {'key': 'key_2001', 'fields': {'type': {'subtask': False}, 'summary': 'Title_11' , 'id': '71160'}}, NaN
102, NaN,{'key': 'key_2002', 'fields': {'type': {'subtask': False}, 'summary': 'Title_12' , 'id': '71161'}}
I am trying to achieve the below output from the above Dataframe.
id, key_value_1, key_value_2
101, key_1001, key_1002
102, key_2001, key_2002
Output of df.dict()
{'id': {103: '101', 676: '101'}, 'key1' : {103: {'fields': {'type': {'subtask': False}, 'summary': 'Title_1' , 'id': '71150'},
676: nan}
You can use:
s=df.set_index('id').stack().str.get('key').unstack()
key1 key2
id
101 key_1001 key_1002
102 key_2001 key_2002

Pandas Groupby: return dict of rows

I would like to group my dataframe by one of the columns and then return a dictionary that has a list of all of the rows per column value. Is there a fast Pandas idiom for doing this?
Example:
test = pd.DataFrame({
'id': ['alice', 'bob', 'bob', 'charlie'],
'transaction_date': ['2020-01-01', '2020-01-01', '2020-01-02', '2020-01-02'],
'amount': [50.0, 10.0, 12.0, 13.0]
})
Desired output:
result = {
'alice': [Series(transaction_date='2020-01-01', amount=50.0)],
'bob': [Series(transaction_date='2020-01-01', amount=10.0), Series(transaction_date='2020-01-02', amount=12.0)],
'charlie': [Series(transaction_date='2020-01-02', amount=53.0)],
}
The following approaches do NOT work:
test.groupby('id').agg(list)
Returns a Dataframe where each column (amount and transaction_date) has a list of values, but that's not what I want. I want the result to be one list of rows / Pandas series per unique grouping column value ('id' value).
test.groupby('id').agg(list).to_dict():
{'amount': {'charlie': [13.0], 'bob': [10.0, 12.0], 'alice': [50.0]}, 'transaction_date': {'charlie': ['2020-01-02'], 'bob': ['2020-01-01', '2020-01-02'], 'alice': ['2020-01-01']}}
test.groupby('id').apply(list).to_dict():
{'charlie': ['amount', 'id', 'transaction_date'], 'bob': ['amount', 'id', 'transaction_date'], 'alice': ['amount', 'id', 'transaction_date']}
Use itertuples and zip,
import pandas as pd
test = pd.DataFrame({
'id': ['alice', 'bob', 'bob', 'charlie'],
'transaction_date': ['2020-01-01', '2020-01-01', '2020-01-02', '2020-01-02'],
'amount': [50.0, 10.0, 12.0, 13.0]
})
columns = ['transaction_date', 'amount']
grouped = (test
.groupby('id')[columns]
.apply(lambda x: list(x.itertuples(name='Series', index=False))))
print(dict(zip(grouped.index, grouped.values)))
{
'alice': [Series(transaction_date='2020-01-01', amount=50.0)],
'bob': [
Series(transaction_date='2020-01-01', amount=10.0),
Series(transaction_date='2020-01-02', amount=12.0)
],
'charlie': [Series(transaction_date='2020-01-02', amount=13.0)]
}