insert json file data to sql table using sparksql - sql

Im trying to insert json file data to sql table using sparksql
my sample json file example :
{
"id": "value_string",
"aggregate_id": "value_string",
"type": "value_string",
"timestamp": "value_string",
"data": {
"customer_id": "value_string",
"name": "value_string"
}
}
want to insert in sql table using spark , tried creating like this as shown but couldn't
public class DataOfPerson
{
public string name { get; set; }
public string birthdate { get; set; }
public string customer_id { get; set; }
}
public class Person
{
public string id { get; set; }
public string aggregate_id { get; set; }
public string type { get; set; }
public string timestamp { get; set; }
public List<DataOfPerson> dataOfPerson { get; set; }
}
public class RootObject
{
public Person person { get; set; }
}
var root = JsonConvert.DeserializeObject<RootObject> (sqlContext.read.json(s"abfss://abc#xyz/events.json")

def flattenDataFrame(spark: SparkSession, nestedDf: DataFrame): DataFrame = {
var flatCols = Array.empty[String]
var nestedCols = Array.empty[String]
var flatDF = spark.emptyDataFrame
for (w <- nestedDf.dtypes) {
if (w._2.contains("Struct")) {
nestedCols = nestedCols.:+(w._1)
} else {
flatCols = flatCols.:+(w._1)
}
}
var nestedCol = Array.empty[String]
for (nc <- nestedCols) {
for (c <- nestedDf.select(nc + ".*").columns) {
nestedCol = nestedCol.:+(nc + "." + c)
}
}
val allColumns = flatCols ++ nestedCol
val colNames = allColumns.map(name => col(name))
nestedDf.select(colNames: _*)
}

Related

Cannot deserialize the current JSON object Cannot deserialize the current JSON object with group column

I am calling the API and trying to store the Jsonresult into a IEnumerable model class. The json result has group header column consolidated_weather.
when I run the program the following error is coming
JsonSerializationException: Cannot deserialize the current JSON object into type 'System.Collections.Generic.IEnumerable`.
How can I call the given json result into the IEnumerable model class after avoiding group header from json result.
I am using the following two model.
public class Weather
{
[Key]
public long id { get; set; }
public string weather_state_name { get; set; }
public string weather_state_abbr { get; set; }
public string wind_direction_compass { get; set; }
public DateTime created { get; set; }
public DateTime applicable_date { get; set; }
public decimal min_temp { get; set; }
public decimal max_temp { get; set; }
public decimal the_temp { get; set; }
public double wind_speed { get; set; }
public decimal wind_direction { get; set; }
public decimal air_pressure { get; set; }
public decimal Humidity { get; set; }
public string Visibllity { get; set; }
public decimal Predictability { get; set; }
}
public class WeatherList
{
public IEnumerable<Weather> consolidated_weather { get; set; }
}
public async Task<IEnumerable<Weather>> GetWeatherAsync(string woied)
{
var url = SD.APIBaseUrl + woied;
var request = new HttpRequestMessage(HttpMethod.Get, url);
var client = _clientFactory.CreateClient();
HttpResponseMessage response = await client.SendAsync(request);
IEnumerable<Weather> weather = new List<Weather>();
WeatherList weatherList = new WeatherList();
if (response.StatusCode == System.Net.HttpStatusCode.OK)
{
var jsonString = await response.Content.ReadAsStringAsync();
weatherList = JsonConvert.DeserializeObject<WeatherList>(jsonString);
return weatherList;
}
return null;
}
The API result is coming as
<!-- language: lang-html -->
{
"consolidated_weather": [
{
"id": 4577625064341504,
"weather_state_name": "Heavy Rain",
"weather_state_abbr": "hr",
"wind_direction_compass": "WSW",
"created": "2020-07-14T19:35:14.577740Z",
"applicable_date": "2020-07-14",
"min_temp": 11.11,
"max_temp": 15.05,
"the_temp": 14.32,
"wind_speed": 6.570953330777592,
"wind_direction": 254.13274105640758,
"air_pressure": 1016.5,
"humidity": 85,
"visibility": 7.654361031575599,
"predictability": 77
},
{
"id": 4896540210495488,
"weather_state_name": "Showers",
"weather_state_abbr": "s",
"wind_direction_compass": "WNW",
"created": "2020-07-14T19:35:17.569534Z",
"applicable_date": "2020-07-15",
"min_temp": 12.31,
"max_temp": 17.03,
"the_temp": 16.509999999999998,
"wind_speed": 7.600821124862802,
"wind_direction": 284.49357944800784,
"air_pressure": 1015.5,
"humidity": 82,
"visibility": 13.558008729022509,
"predictability": 73
},
]
"title": "Texas",
"location_type": "City",
"timezone": "US"
<!-- end snippet -->
From your json string, we can see that there is a list of weather object in object consolidated_weather.
So you need to parse json to WeatherList.
public class WeatherList {
public IEnumerable<Weather> consolidated_weather { get; set; }
}
[HttpPost("/weather")]
public async Task<IEnumerable<Weather>> GetWeatherAsync()
{
string jsonString = "{\"consolidated_weather\":[{\"id\":4577625064341504,\"weather_state_name\":\"Heavy Rain\",\"weather_state_abbr\":\"hr\",\"wind_direction_compass\":\"WSW\",\"created\":\"2020-07-14T19:35:14.577740Z\",\"applicable_date\":\"2020-07-14\",\"min_temp\":11.11,\"max_temp\":15.05,\"the_temp\":14.32,\"wind_speed\":6.570953330777592,\"wind_direction\":254.13274105640758,\"air_pressure\":1016.5,\"humidity\":85,\"visibility\":7.654361031575599,\"predictability\":77},{\"id\":4896540210495488,\"weather_state_name\":\"Showers\",\"weather_state_abbr\":\"s\",\"wind_direction_compass\":\"WNW\",\"created\":\"2020-07-14T19:35:17.569534Z\",\"applicable_date\":\"2020-07-15\",\"min_temp\":12.31,\"max_temp\":17.03,\"the_temp\":16.509999999999998,\"wind_speed\":7.600821124862802,\"wind_direction\":284.49357944800784,\"air_pressure\":1015.5,\"humidity\":82,\"visibility\":13.558008729022509,\"predictability\":73}]}";
IEnumerable<Weather> weather = new List<Weather>();
WeatherList weatherList = new WeatherList();
weatherList = JsonConvert.DeserializeObject<WeatherList>(jsonString);
return weatherList.consolidated_weather;
}
Test Result
UPDATE
public class WeatherList
{
public IEnumerable<Weather> consolidated_weather { get; set; }
public string title { get; set; }
public string location_type { get; set; }
public string timezone { get; set; }
}
There are two missings in your json string:
<!-- language: lang-html -->
{
"consolidated_weather": [
{
"id": 4577625064341504,
...
},
{
"id": 4896540210495488,
...
},
] <!-- missing , -->
"title": "Texas",
"location_type": "City",
"timezone": "US"
<!-- missing } -->
<!-- end snippet -->
Assuming that you cannot change the content of the Json, you can create a ConsolidatedWeather class that contains the List of Weathers
public class ConsolidatedWeather
{
public List<Weather> Consolidated_Weather { get; set; }
}
and in your deserialize part
return JsonConvert.DeserializeObject<ConsolidatedWeather>(jsonString).Consolidated_Weather;

Asp.net Core does not serialize my new type

Am using .net core +angular 5, and trying to return a list, but one field is null in JSON response. Am using Postman to trigger debugging and saw in VS that the field has a value coming from the DB.
Don't know why it doesn't in the JSON response.
[HttpGet("[action]")]
public IEnumerable<HikingTrail> HikingTrails()
{
var dbOptions = new DbContextOptionsBuilder<HikingTrailContext>();
dbOptions.UseSqlServer("Server = (localdb)\\mssqllocaldb; Database = HikingApp");
var dbContext = new DAO.HikingTrailContext(dbOptions.Options);
return dbContext.HikingTrails.ToList();
}
This returns:
I'm interested in the "mountainRange" field not being null. In debug window it has the right value.
{
"url": null,
"hikingTrailId": 159,
"mountainRange": null,
"name": "My custom name",
"startPoint": null,
"endPoint": null,
"trailCheckpoints": null,
"type": 2,
"dificulty": null,
"duration": "2 1/2 - 3 h",
"minDuration": "00:00:00",
"maxDuration": "00:00:00",
"seasonality": "mediu",
"equipmentLevel": null,
"trailMarking": null,
"hasTrailType": false
},
I was thinking maybe it's EF Core, and have made this 2nd try (i.e. added Include() to dbContext query):
[HttpGet("[action]")]
public IEnumerable<HikingTrail> HikingTrails()
{
var dbOptions = new DbContextOptionsBuilder<HikingTrailContext>();
dbOptions.UseSqlServer("Server = (localdb)\\mssqllocaldb; Database = HikingApp");
var dbContext = new DAO.HikingTrailContext(dbOptions.Options);
return dbContext.HikingTrails.Include( x => x.MountainRange).ToList();
}
Could not get any response in Postman.
EDIT:
public class HikingTrailContext : DbContext
{
public HikingTrailContext(DbContextOptions<HikingTrailContext> options) : base(options)
{
}
public HikingTrailContext():base(){
}
public DbSet<HikingTrail> HikingTrails { get; set; }
public DbSet<MountainRange> MountainRanges { get; set; }
public DbSet<TrailScrapingSessionInfo> TrailScrapingHistory { get; set; }
protected override void OnModelCreating(ModelBuilder modelBuilder)
{
}
}
public class HikingTrail
{
[Key]
public int HikingTrailId { get; set; }
public HikingTrail() { }
public MountainRange MountainRange { get; set; }
public String Name { get; set; }
public Location StartPoint { get; set; }
public Location EndPoint { get; set; }
public List<Location> TrailCheckpoints { get; }
public TrailType Type => TrailType.Undetermined;
public String Dificulty { get; set; }
public String Duration { get; set; }
public TimeSpan MinDuration { get; set; }
public TimeSpan MaxDuration { get; set; }
public String Seasonality { get; set; }
public String EquipmentLevel { get; set; }
public String TrailMarking { get; set; }
public String URL;
public bool HasTrailType
{
get
{
return this.Type != TrailType.Undetermined;
}
}
public override bool Equals(object obj)
{
return (((HikingTrail)obj).Name == this.Name);
}
public override int GetHashCode()
{
int hash = 17;
// Suitable nullity checks etc, of course :)
hash = hash * 23 + Name.GetHashCode();
hash = hash * 23 + Type.GetHashCode();
hash = hash * 23 + StartPoint.GetHashCode();
return hash;
}
public override string ToString()
{
return Name.ToString();
}
}
EDIT :
I profiled the db on dbContext.HikingTrails.Include(x => x.MountainRange).Where(x => x.MountainRange != null).ToList(); and the generated query is OK, meaning it has a Name column for MountainRange as well.
P.S.: several fields are null, but those ones don't have data yet.
found one solution, Projection to an anonymous type. Also had to be careful not to have two fields with the same name of "Name"
[HttpGet("[action]")]
public dynamic HikingTrails3()
{
var dbOptions = new DbContextOptionsBuilder<HikingTrailContext>();
dbOptions.UseSqlServer("Server = (localdb)\\mssqllocaldb; Database = HikingApp");
var dbContext = new DAO.HikingTrailContext(dbOptions.Options);
var trails = dbContext.HikingTrails.Include(x => x.MountainRange).
Select( i =>new { Name= i.Name, MountainRangeName = i.MountainRange.Name, i.Duration,
i.Dificulty,i.EquipmentLevel, i.Seasonality, i.Type }).ToList();
return trails;
}

Asp.net core 2 webapi post related data insert

How can I tell EF Core to not insert related data on post ?
this are my models for Deposito and Sucursal
public class IDeposito
{
[Key]
public int Id { get; set; }
public string Descrip { get; set; }
public string Alias { get; set; }
public Boolean Activo { get; set; }
public ISucursal Sucursal { get; set; }
}
public class ISucursal
{
[Key]
public int Id { get; set; }
public string Descrip { get; set; }
public string Alias { get; set; }
public Boolean Activo { get; set; }
}
This is my controller
[HttpPost]
public IActionResult Create([FromBody] IDeposito postData)
{
if (postData == null)
{
return BadRequest();
}
_context.Depositos.Add(postData);
_context.SaveChanges();
return CreatedAtRoute("GetDeposito", new { ID = postData.Id }, postData);
}
When I post this model
{
"Descrip": "Neuquen",
"Alias": "NQN",
"Activo": true,
"Sucursal": {
"Id": 1,
"Descrip": "Comodoro Rivadavia",
"Alias": "CR",
"Activo": true
}
}
It fails because it tries to insert into table "Sucursal", although there already exists a "Sucursal" with id: 1.
Is there anyway I can tell EF Core to not update the related tables ? thank you very much
First approach if you want connect new IDeposito to existing ISucursal with Id = 1 then use:
postData.Sucursal = _context.Sucursals.Find(postData.Sucursal.Id);
_context.Depositos.Add(postData);
_context.SaveChanges();
return CreatedAtRoute("GetDeposito", new { ID = postData.Id }, postData);
If you want add only IDeposito without any sucursal:
postData.Sucursal = null;
_context.Depositos.Add(postData);
_context.SaveChanges();
return CreatedAtRoute("GetDeposito", new { ID = postData.Id }, postData);

ASP.NET MVC query lambda expression

Hello I have problem in one query. Why it's always return no value.
public List<UserDetail> userSearchModel(UserSearchModel searchModel)
{
string userid = User.Identity.GetUserId();
var user = _dbContext.UserDetails.Where(x => x.Id == userid);
var result = _dbContext.UserDetails.Except(user).ToList().AsQueryable();
if (searchModel != null)
{
if (searchModel.LanguageId.Count() != 0)
{
List<UserDetailLanguage> usrDetails = new List<UserDetailLanguage>();
foreach (var item in searchModel.LanguageId)
{
var details = _dbContext.UserDetailLanguages.Where(x => x.LanguageId == item).ToList();
foreach (var item2 in details)
{
usrDetails.Add(item2);
}
}
result = result.Where(x => x.UserDetailLanguages == usrDetails);
}
}
return result.ToList();
}
I want to get results which are the same in usrDetails list and in result.UserDetailLanguages.
In result.UserDetailLanguages I have record equals to record in usrDetails but this not want retrieve.
Here is my model:
public class UserDetail
{
public UserDetail()
{
this.UserDetailLanguages = new HashSet<UserDetailLanguage>();
}
[Key, ForeignKey("User")]
public string Id { get; set; }
public DateTime Birthday { get; set; }
public string Sex { get; set; }
public string Country { get; set; }
public string About { get; set; }
[NotMapped]
public int Age { get { return DateTime.Now.Year - Birthday.Year; } }
public virtual ApplicationUser User { get; set; }
public virtual ICollection<UserDetailLanguage> UserDetailLanguages { get; set; }
}
public class UserDetailLanguage
{
public Int32 Id { get; set; }
public virtual UserDetail UserDetail { get; set; }
public string UserDetailId { get; set; }
public virtual Language Language { get; set; }
public Int32 LanguageId { get; set; }
public Boolean IsKnown { get; set; }
public static implicit operator List<object>(UserDetailLanguage v)
{
throw new NotImplementedException();
}
}
public class Language
{
public Language()
{
this.UserDetailLanguages = new HashSet<UserDetailLanguage>();
}
public int Id { get; set; }
public string Value { get; set; }
public string Name { get; set; }
public virtual ICollection<UserDetailLanguage> UserDetailLanguages { get; set; }
}
What I'm doing wrong?
If you want to see if your value is in a list you use the Contains function of the list -- like this:
result = result.Where(x => usrDetails.Contains(x.UserDetailLanguage));
If you want to see if there are any items in both lists you can use intersection like this:
result = result.Where(x => usrDetails.Intersect(x.UserDetailLanguage).Count() > 0);
Looks like you are checking equality between lists in following code
result = result.Where(x => x.UserDetailLanguages == usrDetails);
This might not work, to check equality for lists you can use something like
Enumerable.SequenceEqual(FirstList.OrderBy(fList => fList),
SecondList.OrderBy(sList => sList))

Box V2 API - Where is lock/unlock?

There is a note in the developer road map from December of 2013 saying, "Lock/Unlock – We’ve added support for locking and unlocking files into the V2 API."
I've been all through the V2 API (for c#) and cannot find it anywhere. I expected to find something in the BoxFilesManager class or as something you would pass to UpdateInformationAsync within the BoxFileRequest class.
So is there a way to lock/unlock a file?
Great question. In order to see the current lock status of a file do a
GET https://api.box.com/2.0/files/7435988481/?fields=lock
If there is no lock on the file, you'll get something like this back:
{
"type": "file",
"id": "7435988481",
"etag": "0",
"lock": null
}
If you want to lock a file, you need to do a PUT (update) on the /files/ endpoint with a body that tells us what type of lock, and when to release it. Like this:
PUT https://api.box.com/2.0/files/7435988481/?fields=lock
{"lock": {
"expires_at" : "2014-05-29T19:03:04-07:00",
"is_download_prevented": true
}
}
You'll get a response confirming your lock was created:
{
"type": "file",
"id": "7435988481",
"etag": "1",
"lock": {
"type": "lock",
"id": "14516545",
"created_by": {
"type": "user",
"id": "13130406",
"name": "Peter Rexer gmail",
"login": "prexer#gmail.com"
},
"created_at": "2014-05-29T18:03:04-07:00",
"expires_at": "2014-05-29T19:03:04-07:00",
"is_download_prevented": true
}
}
Since there isn't a lock/unlock yet, I created a Lock Manager based on the existing managers:
class BoxCloudLockManager : BoxResourceManager
{
#region Lock/Unlock Classes
[DataContract]
internal class BoxLockRequestInfo
{
[DataMember(Name = "status")]
public string Status { get; set; }
//[DataMember(Name = "expires_at")]
//public string ExpiresAt { get; set; }
[DataMember(Name = "is_download_prevented")]
public bool IsDownloadPrevented { get; set; }
}
[DataContract]
internal class BoxLockRequest
{
[DataMember(Name = "lock")]
public BoxLockRequestInfo Lock { get; set; }
}
#endregion
const string LockFileString = "{0}/?fields=lock";
public BoxCloudLockManager(IBoxConfig config, IBoxService service, IBoxConverter converter, IAuthRepository auth)
: base(config, service, converter, auth)
{
}
public async Task<BoxLockInfo> LockAsync(string documentId,bool isDownloadPrevented = true)
{
var lockRequest = new BoxLockRequest { Lock = new BoxLockRequestInfo { Status = "lock", IsDownloadPrevented = isDownloadPrevented } };
BoxRequest request = new BoxRequest(_config.FilesEndpointUri, string.Format(LockFileString, documentId))
.Method(RequestMethod.Put)
.Payload(_converter.Serialize(lockRequest));
IBoxResponse<BoxLockInfo> response = await ToResponseAsync<BoxLockInfo>(request).ConfigureAwait(false);
return response.ResponseObject;
}
public async Task<BoxLockInfo> UnlockAsync(string documentId)
{
BoxRequest request = new BoxRequest(_config.FilesEndpointUri, string.Format(LockFileString, documentId))
.Method(RequestMethod.Put)
.Payload("{\"lock\":null}");
IBoxResponse<BoxLockInfo> response = await ToResponseAsync<BoxLockInfo>(request).ConfigureAwait(false);
return response.ResponseObject;
}
public async Task<BoxLockInfo> GetLockInfoAsync(string documentId)
{
BoxRequest request = new BoxRequest(_config.FilesEndpointUri, string.Format(LockFileString, documentId))
.Method(RequestMethod.Get);
IBoxResponse<BoxLockInfo> response = await ToResponseAsync<BoxLockInfo>(request).ConfigureAwait(false);
return response.ResponseObject;
}
}
I derived a class from BoxClient, adding a LockManager and instantiate it within the Constructor.
Here is the Lock Info:
[DataContract]
public class BoxLockedBy
{
[DataMember(Name = "type")]
public string Type { get; set; }
[DataMember(Name = "id")]
public string Id { get; set; }
[DataMember(Name = "name")]
public string Name { get; set; }
[DataMember(Name = "login")]
public string Login { get; set; }
}
[DataContract]
public class BoxLockDetails
{
[DataMember(Name = "type")]
public string Type { get; set; }
[DataMember(Name = "id")]
public string Id { get; set; }
[DataMember(Name = "created_by")]
public BoxLockedBy CreatedBy { get; set; }
[DataMember(Name = "created_at")]
public string CreatedAt { get; set; }
[DataMember(Name = "expires_at")]
public string ExpiresAt { get; set; }
[DataMember(Name = "is_download_prevented")]
public bool IsDownloadPrevented { get; set; }
}
[DataContract]
public class BoxLockInfo
{
[DataMember(Name = "type")]
public string Type { get; set; }
[DataMember(Name = "id")]
public string Id { get; set; }
[DataMember(Name = "etag")]
public string Etag { get; set; }
[DataMember(Name = "lock")]
public BoxLockDetails LockDetails { get; set; }
}