Using Json Input Variables In Airflow EMR Operator Steps - amazon-emr

I'm currently following the template given here: https://github.com/apache/airflow/blob/master/airflow/contrib/example_dags/example_emr_job_flow_manual_steps.py to create a DAG to call for a emr instance using spark submit. When setting up the spark_test_steps, I need to include variables passed in from a POST Json to fill the spark submit like below:
SPARK_TEST_STEPS = [
{
'Name': 'calculate_pi',
'ActionOnFailure': 'CONTINUE',
'HadoopJarStep': {
'Jar': 'command-runner.jar',
'Args': [
'/usr/lib/spark/bin/run-example',
'SparkPi',
kwargs['dag_run'].conf['var_1']
kwargs['dag_run'].conf['var_2']
'10'
]
}
}
]
How can I pass in variables given by the POST Json while still following the format given in the git link to look like below?
from datetime import timedelta
import airflow
from airflow import DAG
from airflow.contrib.operators.emr_create_job_flow_operator import EmrCreateJobFlowOperator
from airflow.contrib.operators.emr_add_steps_operator import EmrAddStepsOperator
from airflow.contrib.sensors.emr_step_sensor import EmrStepSensor
from airflow.contrib.operators.emr_terminate_job_flow_operator import EmrTerminateJobFlowOperator
DEFAULT_ARGS = {
'owner': 'Airflow',
'depends_on_past': False,
'start_date': airflow.utils.dates.days_ago(2),
'email': ['airflow#example.com'],
'email_on_failure': False,
'email_on_retry': False
}
dag = DAG(
'emr_job_flow_manual_steps_dag',
default_args=DEFAULT_ARGS,
dagrun_timeout=timedelta(hours=2),
schedule_interval='0 3 * * *'
)
var_1 = ''
var_2 = ''
SPARK_TEST_STEPS = []
def define_param(**kwargs):
global var_1
global var_2
global SPARK_TEST_STEPS
var_1 = str(kwargs['dag_run'].conf['var_1'])
var_2 = str(kwargs['dag_run'].conf['var_2'])
SPARK_TEST_STEPS = [
{
'Name': 'calculate_pi',
'ActionOnFailure': 'CONTINUE',
'HadoopJarStep': {
'Jar': 'command-runner.jar',
'Args': [
'/usr/lib/spark/bin/run-example',
'SparkPi',
kwargs['dag_run'].conf['var_1']
kwargs['dag_run'].conf['var_2']
'10'
]
}
}
]
return SPARK_TEST_STEPS
DEFINE_PARAMETERS = PythonOperator(
task_id='DEFINE_PARAMETERS',
python_callable=define_param,
provide_context=True,
dag=dag)
cluster_creator = EmrCreateJobFlowOperator(
task_id='create_job_flow',
job_flow_overrides=JOB_FLOW_OVERRIDES,
aws_conn_id='aws_default',
emr_conn_id='emr_default',
dag=dag
)
step_adder = EmrAddStepsOperator(
task_id='add_steps',
job_flow_id="{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
aws_conn_id='aws_default',
steps='{{ ti.xcom_pull(task_ids="DEFINE_PARAMETERS") }}',
dag=dag
)
step_checker = EmrStepSensor(
task_id='watch_step',
job_flow_id="{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
step_id="{{ task_instance.xcom_pull('add_steps', key='return_value')[0] }}",
aws_conn_id='aws_default',
dag=dag
)
cluster_remover = EmrTerminateJobFlowOperator(
task_id='remove_cluster',
job_flow_id="{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
aws_conn_id='aws_default',
dag=dag
)
cluster_creator.set_downstream(step_adder)
step_adder.set_downstream(step_checker)
step_checker.set_downstream(cluster_remover)
I cannot use Variable.get and Variable.set as this will not allow multiple dag calls for different variable types at the same time due to the constant changing of airflow global variables. I have tried calling SPARK_TEST_STEPS using xcom but the return type of xcom is string and EmrAddStepsOperator steps requires a list.

I solved a similar problem by creating a custom operator that parses the json prior to executing. The cause of the problem is that when you pass steps='{{ ti.xcom_pull(task_ids="DEFINE_PARAMETERS") }}',. you are literally passing a string with the value interpolated by the templating engine, it is not deserialized.
from airflow.contrib.hooks.emr_hook import EmrHook
from airflow.exceptions import AirflowException
from airflow.models import BaseOperator
from airflow.utils.decorators import apply_defaults
from airflow.contrib.operators.emr_add_steps_operator import EmrAddStepsOperator
import json
class DynamicEmrStepsOperator(EmrAddStepsOperator):
template_fields = ['job_flow_id', 'steps']
template_ext = ()
ui_color = '#f9c915'
#apply_defaults
def __init__(
self,
job_flow_id=None,
steps="[]",
*args, **kwargs):
super().__init__(
job_flow_id = job_flow_id,
steps = steps,
*args, **kwargs)
def execute(self, context):
self.steps = json.loads(self.steps)
return super().execute(context)

Related

Slack automate calendar events

I'm looking for a way to automate the creation of calendar events. I'm part of multiple spaces in my school and they keep on posting some events that are happening on a regular basis.
I was wondering is there's a way to automate these calendar events. I want to write a script with Slack api's that can read the messages from all the spaces I'm part of and scan them to see if there's any event related information and create a new calendar event in my google calendars. I want to run this at the end of the day on all the messages from all the spaces.
from __future__ import print_function
import os
import json
import pprint
import time
import parsedatetime
from datetime import datetime
from datetime import timedelta
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
def get_google_service():
creds = None
SCOPES = ['https://www.googleapis.com/auth/calendar']
if os.path.exists('token.json'):
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
return build('calendar', 'v3', credentials=creds)
def send_google_calendar_invite(service, channel_name, start_time, end_time):
try:
# f = open("template.json", "r")
# template_data = f.read()
template_data = '''
{
"summary": "event_name",
"location": "event_location",
"description": "event_description",
"start": {
"dateTime": "event_start_time",
"timeZone": "America/Los_Angeles"
},
"end": {
"dateTime": "event_end_time",
"timeZone": "America/Los_Angeles"
}
}
'''
template_data = template_data.replace('event_name', channel_name)
template_data = template_data.replace('event_location', channel_name+'-meeting')
template_data = template_data.replace('event_description', channel_name+'-desrpition')
template_data = template_data.replace('event_start_time', start_time)
template_data = template_data.replace('event_end_time', end_time)
json_object = json.loads(template_data)
json_formatted_str = json.dumps(json_object, indent=2)
print(json_formatted_str)
event = service.events().insert(calendarId='primary', body=json_object).execute()
print('Event created: %s' % (event.get('htmlLink')))
except HttpError as error:
print('An error occurred: %s' % error)
def read_slack_messages():
channel_id = "C04QL76V21X"
try:
lastHourDateTime = datetime.now() - timedelta(hours=24)
client = WebClient(token=open("secrets.txt", "r").read())
conversation_history = client.conversations_history(channel=channel_id, oldest=time.mktime(lastHourDateTime.timetuple()))
channel_info_result = client.conversations_info(channel=channel_id)
channel_name = channel_info_result['channel']['name']
conversation_messages = conversation_history["messages"]
print("{} messages found in {}".format(len(conversation_messages), id))
# import pdb; pdb.set_trace();
service = get_google_service()
for message in conversation_messages[:2]:
chat_message = message['text']
try:
cal = parsedatetime.Calendar()
dates = cal.parse(chat_message)
print(dates)
start_time = time.strftime('%Y-%m-%dT%H:%M:%S-%000:00', (dates[0]))
end_time = start_time[:11]+f"{int(start_time[11:13])+1:02}"+start_time[13:]
print(chat_message, ' : ', start_time, ' ||| ', end_time)
send_google_calendar_invite(service, channel_name, start_time, end_time)
except TypeError as e:
print(' : Nope : ', e);
except SlackApiError as e:
print("Error getting conversation: {}".format(e))
if __name__ == '__main__':
read_slack_messages()

Pool apply function hangs and never executes

I am trying to fetch Rally data by using its python library pyral. Sequentially the same code works, but its slow.
I thought of using python multiprocess package, however my pool.apply method gets stuck and never executes. I tried running it in Pycharm IDE as well as the windows cmd prompt.
import pandas as pd
from pyral import Rally
from multiprocessing import Pool, Manager
from pyral.entity import Project
def process_row(sheetHeaders: list, item: Project, L: list):
print('processing row : ' + item.Name) ## this print never gets called
row = ()
for header in sheetHeaders:
row.append(process_cell(header, item))
L.append(row)
def process_cell(attr, item: Project):
param = getattr(item, attr)
if param is None:
return None
try:
if attr == 'Owner':
return param.__getattr__('Name')
elif attr == 'Parent':
return param.__getattr__('ObjectID')
else:
return param
except KeyError as e:
print(e)
# Projects
# PortfolioItem
# User Story
# Hierarchical Req
# tasks
# defects
# -------------MAIN-----------------
def main():
# Rally connection
rally = Rally('rally1.rallydev.com', apikey='<my_key>')
file = 'rally_data.xlsx'
headers = {
'Project': ['Name', 'Description', 'CreationDate', 'ObjectID', 'Parent', 'Owner', 'State'],
}
sheetName = 'Project'
sheetHeaders = headers.get(sheetName)
p = Pool(1)
result = rally.get(sheetName, fetch=True, pagesize=10)
with Manager() as manager:
L = manager.list()
for item in result:
print('adding row for : ' + item.Name)
p.apply_async(func=process_row, args=(sheetHeaders, item, L)) ## gets stuck here
p.close()
p.join()
pd.DataFrame(L).to_excel(file, sheet_name=sheetName)
if __name__ == '__main__':
main()
Also tried without Manager list without any difference in the outcome
def main():
# Rally connection
rally = Rally('rally1.rallydev.com', apikey='<key>')
file = 'rally_data.xlsx'
headers = {
'Project': ['Name', 'Description', 'CreationDate', 'ObjectID', 'Parent', 'Owner', 'State'],
}
sheetName = 'Project'
sheetHeaders = headers.get(sheetName)
result = rally.get(sheetName, fetch=True, pagesize=10)
async_results = []
with Pool(50) as p:
for item in result:
print('adding row for : ' + item.Name)
async_results.append(p.apply_async(func=process_row, args=(sheetHeaders, item)))
res = [r.get() for r in async_results]
pd.DataFrame(res).to_excel(file, sheet_name=sheetName)
I dont know why, but replacing multiprocessing
with multiprocessing.dummy in the import statement worked.

airflow BigQueryOperator ERROR - 400 Syntax error: Unexpected token at [1:244] - while using params

I have 2 BigQueryOperator tasks in a loop. The first task works perfectly, however the second task (create_partition_table_agent_intensity_{v_list[i]}) throws an error:
ERROR - 400 Syntax error: Unexpected "{" at [1:244]
I can't understand what is the difference between the tasks.
Maybe someone can point me to the right direction?
Here is my entire code:
from airflow.models import (DAG, Variable)
import os
from airflow.operators.dummy import DummyOperator
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator
import datetime
import json
import pandas as pd
from airflow.contrib.operators.gcs_to_bq import GoogleCloudStorageToBigQueryOperator
from airflow.contrib.operators.bigquery_operator import BigQueryOperator
from google.cloud import bigquery
from airflow.contrib.hooks.bigquery_hook import BigQueryHook
from airflow.providers.google.cloud.operators.bigquery import BigQueryDeleteTableOperator
default_args = {
'start_date': datetime.datetime(2020, 1, 1),
}
PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "bigquery_default")
PROJECT_ID_GCP = os.environ.get("GCP_PROJECT_ID", "my_project")
DATASET_MRR = os.environ.get("GCP_BIGQUERY_DATASET_NAME", "LP_RAW")
DATASET_STG = os.environ.get("GCP_BIGQUERY_DATASET_NAME", "LP_STG")
MRR_AGENT_ACTIVITY = "RPT_FA_AGENT_ACTIVITY_VW"
MRR_AGENT_INTENSITY = "RPT_AGG_15M_MSG_AGENT_INTENSITY_VW"
STG_AGENT_ACTIVITY_PARTITIONED = "agent_acitivity_partitioned"
STG_AGENT_INTENSITY_PARTITIONED = "agent_intensity_partitioned"
def list_dates_in_df(ti):
hook = BigQueryHook(bigquery_conn_id=PROJECT_ID,
use_legacy_sql=False)
bq_client = bigquery.Client(project = hook._get_field("project"),
credentials = hook._get_credentials())
query = "select distinct(cast(PARTITION_KEY as string)) as PARTITION_KEY \
FROM LP_MNG.PartitionStatusMonitoring\
where SOURCE_TABLE in ('RPT_FA_AGENT_ACTIVITY_VW','RPT_AGG_15M_MSG_AGENT_INTENSITY_VW')\
and IS_LOAD_COMPLETED = false;"
df = bq_client.query(query).to_dataframe()
res = df.values.tolist()
#unpack the list of lists, l is a list inside res list, take item from res, now each item is l
my_list = [item for l in res for item in l]
ti.xcom_push(key = 'list_of_dates', value = my_list)
def update_variable(ti):
updated_file_list = ti.xcom_pull(key = 'list_of_dates',task_ids='list_dates')
Variable.set(key="updated_dates", value=json.dumps(updated_file_list))
print(updated_file_list)
print(type(updated_file_list))
with DAG(
'test_with_mng_table_list',
schedule_interval=None,
catchup = False,
default_args=default_args
) as dag:
list_dates = PythonOperator(
task_id ='list_dates',
python_callable = list_dates_in_df
)
set_list = PythonOperator(
task_id= 'set_list',
python_callable=update_variable
)
v_list = Variable.get("updated_dates", deserialize_json=True)
end_job = BashOperator(
task_id='end_job',
bash_command='echo end_job.',
trigger_rule = 'all_done', )
for i in range(len(v_list)):
create_partition_table_agent_activity = BigQueryOperator(
task_id=f"create_partition_table_agent_activity_{v_list[i]}",
sql="select ACCOUNT_ID,timestamp_trunc(CHANGE_EVENT_TIME_15M,HOUR) as ANALYSIS_DATE,\
AGENT_ID,AGENT_GROUP_ID,USER_TYPE_ID,\
sum(AWAY_ENGAGED_TIME) AWAY_ENGAGED_TIME,sum(BACKIN5_ENGAGED_TIME) BACKIN5_ENGAGED_TIME,\
sum(DURATION_DAYS) DURATION_DAYS,sum(ONLINE_TIME) ONLINE_TIME,\
sum(BACK_IN_5_TIME) BACK_IN_5_TIME,sum(AWAY_TIME) AWAY_TIME\
from {{ params.PROJECT_ID }}.{{ params.DATASET_MRR }}.{{ params.MRR1 }}\
where cast(CHANGE_EVENT_TIME_15M as STRING FORMAT 'YYYY-MM-DD') = cast('{{ params.date_a }}' as STRING) \
group by 1,2,3,4,5;",
params={"PROJECT_ID":PROJECT_ID_GCP ,
"DATASET_MRR":DATASET_MRR,
"MRR1":MRR_AGENT_ACTIVITY,
"date_a" : v_list[i]
},
destination_dataset_table=f"{PROJECT_ID_GCP}.{DATASET_STG}.{STG_AGENT_ACTIVITY_PARTITIONED}{v_list[i]}",
create_disposition='CREATE_IF_NEEDED',
write_disposition='WRITE_TRUNCATE',
#bigquery_conn_id=CONNECTION_ID,
use_legacy_sql=False,
dag=dag
)
create_partition_table_agent_intensity = BigQueryOperator(
task_id=f"create_partition_table_agent_intensity_{v_list[i]}",
sql=f"select ACCOUNT_ID,timestamp_trunc(AGG_DATE,HOUR) as ANALYSIS_DATE,\
AGENT_ID, GROUP_ID as AGENT_GROUP_ID,\
USER_TYPE_ID, SUM(SUM_CONVERSATION_LOAD_RATE) as SUM_CONVERSATION_LOAD_RATE,\
SUM(NO_EVENTS) AS NO_EVENTS\
from {{ params.PROJECT_ID }}.{{ params.DATASET_MRR }}.{{ params.MRR2 }}\
where cast(AGG_DATE as STRING FORMAT 'YYYY-MM-DD') = cast('{{ params.date_a }}' as STRING) \
group by 1,2,3,4,5;",
params={"PROJECT_ID":PROJECT_ID_GCP ,
"DATASET_MRR":DATASET_MRR,
"MRR2":MRR_AGENT_INTENSITY,
"date_a" : v_list[i]
},
destination_dataset_table=f"{PROJECT_ID_GCP}.{DATASET_STG}.{STG_AGENT_INTENSITY_PARTITIONED}{v_list[i]}",
create_disposition='CREATE_IF_NEEDED',
write_disposition='WRITE_TRUNCATE',
#bigquery_conn_id=CONNECTION_ID,
use_legacy_sql=False,
dag=dag
)
d2 = DummyOperator(task_id='generate_data_{0}'.format(v_list[i]),dag=dag)
list_dates >> set_list >> [
create_partition_table_agent_activity,create_partition_table_agent_intensity
] >> d2 >> end_job
I do not have playground to test it, but I think you should not use f-string for sql parameter. If you use {{something}} in f-string it returns string {something} so parameters for query are not inserted and this results in SQL syntax error as query is run without parameters. Please try to remove f before string for sql in 2nd task.

How to troubleshot 'timeout' error on Airflow

I have a new DAG that performs a 3 task operation, the DAG runs fine but every now and then i'm getting a 'timeout' error massage on the top in red. i have now idea why this is.
Does anybody knows what can be the cause ?
Here is my code (with a few parameters i changed for discretion reasons):
from airflow import DAG
from airflow.operators.mysql_operator import MySqlOperator
from datetime import datetime
from airflow.operators.sensors import NamedHivePartitionSensor
from airflow.hooks.presto_hook import PrestoHook
import sys
import os
import logging
sys.path.append(os.environ['SSSSSS'] + '/WWW/WWWWW')
from utils import sql_to_string, parse_exec_to_time, parse_exec_to_date, NewPrestoOperator
from config import emails
from NotifyOperator import NotifyOperator
########################################################################
# Parameters to be set
default_args = {
'owner': 'etl',
'start_date': datetime(2019, 04, 15, 0, 0),
'depends_on_past': True,
'wait_for_downstream': True,
'email': data_team_emails,
'email_on_failure': True,
'email_on_retry': False
}
dag = DAG(dag_id='g13-new_lead_form_alert',
default_args=default_args,
max_active_runs=1,
schedule_interval='0 * * * *')
def _get_records_pandas(query):
start_time = datetime.now()
logging.log(logging.INFO, "Extract Query={}".format(query))
records = PrestoHook(presto_conn_id='{0}-new'.format(os.environ['YYYYY'])).get_pandas_df(query)
logging.log(logging.INFO, "Extract completed. it took:{}".format(str(datetime.now() - start_time)))
return records
SELECT_ALL_QUERY = 'select title, pageloadid from mysql.{0}.agg_pageloadid_lead_form'.format(os.environ['DDDDDD'])
t0 = NamedHivePartitionSensor(task_id='g13-00-wait_for_partition',
partition_names=['{2}.table/dt={0}/tm={1}/'.format(
'{{ (execution_date + macros.timedelta(minutes=60)).strftime(\'%Y-%m-%d\')}}',
'{{ (execution_date + macros.timedelta(minutes=60)).strftime(\'%H\')}}',
os.environ['XXXXX'])],
metastore_conn_id='RRRRRR',
dag=dag,
soft_fail=True,
pool='sensor_tasks',
retries=5
)
t1 = MySqlOperator(
task_id='g13-01-truncate',
sql='''
truncate table {0}.agg_pageloaduid_lead_form
'''.format(os.environ['LLLLL']),
mysql_conn_id='AAAA',
dag=dag)
t2 = NewPrestoOperator(
task_id="g13-02-insert_new_lead",
sql=sql_to_string("/g13_insert_new_lead.sql").format(
os.environ['YYYYY'],
'{{execution_date.strftime(\'%Y-%m-%d\')}}',
'{{execution_date.strftime(\'%H\')}}',
os.environ['ETL_ENVIRONMENT']),
presto_conn_id='{0}-new'.format(os.environ['XXXXX']),
provide_context=True,
fail_on_zero_rows=False,
retries=5,
retry_delay=60,
pool='presto_tasks',
dag=dag
)
t3 = NotifyOperator(
task_id='g13-03-notification',
channels=['test'],
email_recipients=[],
email_subject='New Lead Alert',
email_template="""abc""",
op_kwargs={
'title': 'New Lead Form',
'response': _get_records_pandas(SELECT_ALL_QUERY)
},
dag=dag
)
t0 >> t1 >> t2 >> t3
Any idea what could be causing this ?

Django: object needs to have a value for field "..." before this many-to-many relationship can be used

I experience a strange error with Django 1.5:
I have defined a model like below:
class Company(models.Model):
user = models.OnetoOneField(User)
agreed_to_terms = models.NullBooleanField(default=False)
address = models.CharField(_('Complete Address'),
max_length = 255, null = True, blank = True)
winning_bid = models.ForeignKey('Bid',
related_name='winning_bid',
blank = True, null = True)
bid_list = models.ManyToManyField('Bid',
related_name='bids',
blank = True, null = True)
...
class Bid(models.Model):
user = models.ForeignKey(User, null = True, blank = True)
description = models.TextField(_('Description'),
blank = True, null = True,)
volume = models.DecimalField(max_digits=7, decimal_places=3,
null=True, blank=True,)
...
# all other attributes are of the Boolean, CharField or DecimalField type. No Foreignkeys, nor ManytoManyFields.
When I try to file the form with the initial data through the Django admin, I get the following error:
Exception Value:
"" needs to have a value for field "company" before this many-to-many relationship can be used.
Please see the traceback below.
The error message does not make very much sense to me. The only m2m relationship is bid_list, which is null = True and was null at the time of saving.
Is there something new in Django 1.5, which I have not discovered while reading the changelog (this is my first project in Django 1.5)?
Interestingly, when I save an object in the Django shell, I do not get an error message, but the object does not get saved without any error message.
In [1]: user = User.objects.get(username='admin')
In [2]: new_company = Company()
In [3]: new_company.user = user
In [4]: new_company.save() Out[4]: <Company: Company object>
In [5]: foo = Company.objects.all()
Out[5]: []
When I try to trace the SQL statements with the debug toolbar, I can only see SQL SELECT statements, no INSERT requests.
What is the explanation for this strange behaviour?
Traceback:
Request Method: POST
Request URL: /admin/company/company/add/
Django Version: 1.5.1
Python Version: 2.7.1
Installed Applications:
('django.contrib.admin',
'django.contrib.admindocs',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.gis',
'django.contrib.humanize',
'django.contrib.sessions',
'django.contrib.sites',
'django.contrib.messages',
'django.contrib.staticfiles',
'django.contrib.admin',
'django.contrib.admindocs',
'crispy_forms',
'django_extensions',
'easy_thumbnails',
'registration',
'south',
'company',
'bid',
'debug_toolbar')
Installed Middleware:
('django.middleware.common.CommonMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'debug_toolbar.middleware.DebugToolbarMiddleware')
Traceback:
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/core/handlers/base.py" in get_response
115. response = callback(request, *callback_args, **callback_kwargs)
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/contrib/admin/options.py" in wrapper
372. return self.admin_site.admin_view(view)(*args, **kwargs)
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/utils/decorators.py" in _wrapped_view
91. response = view_func(request, *args, **kwargs)
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/views/decorators/cache.py" in _wrapped_view_func
89. response = view_func(request, *args, **kwargs)
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/contrib/admin/sites.py" in inner
202. return view(request, *args, **kwargs)
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/utils/decorators.py" in _wrapper
25. return bound_func(*args, **kwargs)
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/utils/decorators.py" in _wrapped_view
91. response = view_func(request, *args, **kwargs)
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/utils/decorators.py" in bound_func
21. return func(self, *args2, **kwargs2)
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/db/transaction.py" in inner
223. return func(*args, **kwargs)
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/contrib/admin/options.py" in add_view
1008. self.save_related(request, form, formsets, False)
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/contrib/admin/options.py" in save_related
762. form.save_m2m()
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/forms/models.py" in save_m2m
84. f.save_form_data(instance, cleaned_data[f.name])
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/db/models/fields/related.py" in save_form_data
1336. setattr(instance, self.attname, data)
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/db/models/fields/related.py" in __set__
910. manager = self.__get__(instance)
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/db/models/fields/related.py" in __get__
897. through=self.field.rel.through,
File "/Users/neurix/Development/virtual/lib/python2.7/site-packages/django/db/models/fields/related.py" in __init__
586. (instance, source_field_name))
Exception Type: ValueError at /admin/company/company/add/
Exception Value: "<Company: Company object>" needs to have a value for field "company" before this many-to-many relationship can be used.
settings.py
import os, os.path, sys
DEBUG = True
TEMPLATE_DEBUG = DEBUG
# Setting up folders
abspath = lambda *p: os.path.abspath(os.path.join(*p))
PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__))
TASK2_MODULE_PATH = abspath(PROJECT_ROOT, 'apps/')
sys.path.insert(0, TASK2_MODULE_PATH)
# Loading passwords
try:
from settings_pwd import *
except ImportError:
pass
AUTH_PROFILE_MODULE = 'profile.UserProfile'
#ALLOWED_HOSTS = [''] # not needed for DEBUG = True
TIME_ZONE = 'Europe/London'
LANGUAGE_CODE = 'en-uk'
LANGUAGES = [
("en", u"English"),
]
SITE_ID = 1
USE_I18N = True
USE_L10N = True
USE_TZ = True
if DEBUG:
MEDIA_ROOT = os.path.join(PROJECT_ROOT, "site_media", "media")
else:
MEDIA_ROOT = "folder_to_upload_files"
if DEBUG:
MEDIA_URL = "/media/"
else:
MEDIA_URL = "/media/uploads/"
if DEBUG:
STATIC_ROOT = os.path.join(PROJECT_ROOT, "site_media","static")
else:
STATIC_ROOT = "folder_to_static_files"
STATIC_URL = '/static/'
STATICFILES_DIRS = (
os.path.join(PROJECT_ROOT, "assets"),
)
STATICFILES_FINDERS = (
'django.contrib.staticfiles.finders.FileSystemFinder',
'django.contrib.staticfiles.finders.AppDirectoriesFinder',
)
SECRET_KEY = '...'
TEMPLATE_LOADERS = (
'django.template.loaders.filesystem.Loader',
'django.template.loaders.app_directories.Loader',)
MIDDLEWARE_CLASSES = (
'django.middleware.common.CommonMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',)
ROOT_URLCONF = 'task2.urls'
WSGI_APPLICATION = 'task2.wsgi.application'
TEMPLATE_DIRS = (
os.path.join(PROJECT_ROOT, "templates"),
os.path.join(PROJECT_ROOT, "templates/pages"),)
INSTALLED_APPS = (
# Django apps
'django.contrib.admin',
'django.contrib.admindocs',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.gis',
'django.contrib.humanize',
'django.contrib.sessions',
'django.contrib.sites',
'django.contrib.messages',
'django.contrib.staticfiles',
'django.contrib.admin',
'django.contrib.admindocs',
# third party apps
'crispy_forms',
'django_extensions',
'easy_thumbnails',
'registration',
'south',
# task2 apps
'profile',
'company',
)
AUTHENTICATION_BACKENDS = (
'django.contrib.auth.backends.ModelBackend',
)
log = DEBUG
if log:
LOGGING = {
'version': 1,
'disable_existing_loggers': True,
'formatters': {
'simple': {
'format': '%(levelname)s %(message)s',
},
},
'handlers': {
'console':{
'level':'DEBUG',
'class':'logging.StreamHandler',
'formatter': 'simple'
},
},
'loggers': {
'django': {
'handlers': ['console'],
'level': 'DEBUG',
},
}
}
####################
# THIRD PARTY SETUPS
# For Crispy Forms
CRISPY_FAIL_SILENTLY = not DEBUG
CRISPY_TEMPLATE_PACK = 'bootstrap'
## For Django Registration
ACCOUNT_ACTIVATION_DAYS = 7
# for Django testing to avoid conflicts with South migrations
SOUTH_TESTS_MIGRATE = False
# Debug_toolbar settings
if DEBUG:
INTERNAL_IPS = ('127.0.0.1',)
MIDDLEWARE_CLASSES += (
'debug_toolbar.middleware.DebugToolbarMiddleware',
)
INSTALLED_APPS += (
'debug_toolbar',
)
DEBUG_TOOLBAR_PANELS = (
'debug_toolbar.panels.version.VersionDebugPanel',
'debug_toolbar.panels.timer.TimerDebugPanel',
'debug_toolbar.panels.settings_vars.SettingsVarsDebugPanel',
'debug_toolbar.panels.headers.HeaderDebugPanel',
#'debug_toolbar.panels.profiling.ProfilingDebugPanel',
'debug_toolbar.panels.request_vars.RequestVarsDebugPanel',
'debug_toolbar.panels.sql.SQLDebugPanel',
'debug_toolbar.panels.template.TemplateDebugPanel',
'debug_toolbar.panels.cache.CacheDebugPanel',
'debug_toolbar.panels.signals.SignalDebugPanel',
'debug_toolbar.panels.logger.LoggingPanel',
)
DEBUG_TOOLBAR_CONFIG = {
'INTERCEPT_REDIRECTS': False,
}
# Easy_Thumbnail setup
THUMBNAIL_ALIASES = {
'': {
'thumbnail': {'size': (50, 50), 'crop': True},
},
}
The problem has to do with how you use your views.
I think you are using:
instance.add(many_to_many_instance)
before you have an instance id.
so first save your model:
instance.save()
instance.add(many_to_many_instance)
You are using custom user model AUTH_PROFILE_MODULE = 'profile.UserProfile', but in code I suppose you use native django user.
I suppose your models should be like
class Company(models.Model):
user = models.OnetoOneField('profile.UserProfile')
...
read more https://docs.djangoproject.com/en/1.5/ref/contrib/auth/#django.contrib.auth.models.User.get_profile