How can I find thread ID using WinDBG critical section and thread list? - locking

I analyzed dump file to find locked thread, and I finded using !cs command.
DebugInfo = 0x00639610
Critical section = 0x03e210c8 (+0x3E210C8)
LOCKED
LockCount = 0x1
WaiterWoken = No
OwningThread = 0x000017c8
RecursionCount = 0x1
LockSemaphore = 0x5B0
SpinCount = 0x00000000
This result means that ThreadID 0x000017c8 get the critical section. So I liste d all thread to find threadID 0x000017c8 using ~ command.
0:000> ~
. 0 Id: 2240.1758 Suspend: 0 Teb: 7efdd000 Unfrozen
1 Id: 2240.1d90 Suspend: 0 Teb: 7efda000 Unfrozen
2 Id: 2240.16fc Suspend: 0 Teb: 7efd7000 Unfrozen
3 Id: 2240.1544 Suspend: 0 Teb: 7ef9c000 Unfrozen
4 Id: 2240.2550 Suspend: 0 Teb: 7ef99000 Unfrozen
5 Id: 2240.fd4 Suspend: 0 Teb: 7ef96000 Unfrozen
6 Id: 2240.1b08 Suspend: 0 Teb: 7ef93000 Unfrozen
7 Id: 2240.1958 Suspend: 0 Teb: 7ef90000 Unfrozen
8 Id: 2240.20e8 Suspend: 0 Teb: 7ef8d000 Unfrozen
9 Id: 2240.1bec Suspend: 0 Teb: 7ef8a000 Unfrozen
10 Id: 2240.fb4 Suspend: 0 Teb: 7ef87000 Unfrozen
11 Id: 2240.25c0 Suspend: 0 Teb: 7ef84000 Unfrozen
12 Id: 2240.15b0 Suspend: 0 Teb: 7ef81000 Unfrozen
13 Id: 2240.21a8 Suspend: 0 Teb: 7ef7b000 Unfrozen
14 Id: 2240.1fcc Suspend: 0 Teb: 7ef78000 Unfrozen
But, There is no threadID 0x000017c8. What happened this dump file? How can i find thread id 0x000017c8?

Kjell Gunnar is right: thread 17c8 has terminated for whatever reason.
This can not only happen with critical sections, it can also happen for other synchronization objects. In my advanced debugging training, I provide the participants a dump with a similar situation for a ManualResetEvent.
Here's the source:
#include "stdafx.h"
#include <Windows.h>
#include <process.h>
#include <stdio.h>
#include <iostream>
HANDLE threadA;
HANDLE threadB;
HANDLE eventB;
class WorkItem
{
public:
virtual void Initialize()
{
}
};
unsigned int __stdcall initializeWorkitems(void* param)
{
try
{
// Initialize workitems
WorkItem **items = new WorkItem*[2];
items[0] = new WorkItem();
items[1] = NULL;
for (int i = 0; i<2; i++)
{
items[i]->Initialize();
}
// Signal event for second thread to work on work items
SetEvent(eventB);
}
catch(...)
{
// Don't do this
}
return 0;
}
unsigned int __stdcall processWorkitems(void* param)
{
// Wait for work item initialization to complete
WaitForSingleObject(eventB, INFINITE);
// Work on workitems
Sleep(100);
return 0;
}
int _tmain(int argc, _TCHAR* argv[])
{
eventB = CreateEvent(0, 0, 0, 0);
threadA = (HANDLE)_beginthreadex(0, 0, &initializeWorkitems, (void*)0, 0, 0);
threadB = (HANDLE)_beginthreadex(0, 0, &processWorkitems, (void*)0, 0, 0);
WaitForSingleObject(threadA, INFINITE);
WaitForSingleObject(threadB, INFINITE);
CloseHandle(threadA);
CloseHandle(threadB);
CloseHandle(eventB);
return 0;
}
Create a dump at the time the process hangs. The analysis should result in a case comparable to your critical section. threadA terminates silenty, eating a NullPointerException in the catch(...) block, such never freeing the manual reset event.

Related

Servo motor won't work for a specific code

I have written a code with interrupts to control the servo but my servos are not working. One is meant to work with interrupt but the other one has to simply move but it is also not working. Even the serial window shows that the code is working properly ut the servos aren't moving I have checked both my servos with same connections and Sweep example of Arduino and both work fine.
#include <TimerOne.h> // Header file for TimerOne library
#include <Servo.h>
#define trigPin 12 // Pin 12 trigger output
#define echoPin 2 // Pin 2 Echo input
#define echo_int 0 // Interrupt id for echo pulse
#define TIMER_US 50 // 50 uS timer duration
#define TICK_COUNTS 4000 // 200 mS worth of timer ticks
volatile long echo_start = 0; // Records start of echo pulse
volatile long echo_end = 0; // Records end of echo pulse
volatile long echo_duration = 0; // Duration - difference between end and start
volatile int trigger_time_count = 0; // Count down counter to trigger pulse time
volatile long range_flasher_counter = 0; // Count down counter for flashing distance LED
int sound = 250;
Servo servo1; //Servos
Servo servo2;
const int button1 = 10; //Buttons
const int button2 = 8;
const int button3 = 13;
const byte interruptPin = 3;
int pos;
void setup() {
servo1.attach(9); // servo for arm
servo2.attach(5); // servo for base
pinMode(trigPin, OUTPUT); // Trigger pin set to output
pinMode(echoPin, INPUT); // Echo pin set to input
// Onboard LED pin set to output
Timer1.initialize(TIMER_US); // Initialise timer 1
Timer1.attachInterrupt( timerIsr ); // Attach interrupt to the timer service routine
attachInterrupt(echo_int, echo_interrupt, CHANGE);
pinMode(interruptPin, INPUT_PULLUP);
attachInterrupt(digitalPinToInterrupt(interruptPin),Metal_detected, HIGH);
pinMode(button1, INPUT);
pinMode(button2, INPUT);
Serial.begin(9600);
}
void loop() {
// put your main code here, to run repeatedly:
servo1.write(0); // These will make the servos move to the mapped angles
servo2.write(90);
distance_checking();
if(digitalRead(button1) == HIGH)
{
while(digitalRead(button2) == LOW)
{
Serial.println("Entering Sweeping mode");
for (pos = 30; pos <= 150; pos += 1)
{ Serial.print("Angle is :");
Serial.println(pos);
servo2.write(pos);
distance_checking();
//delay(0.1); // waits 15ms for the servo to reach the position
if(digitalRead(button2) == HIGH)
{
Serial.print("Exiting Sweeping");
goto label;}
}
for (pos = 150; pos >= 30; pos -= 1) { // goes from 180 degrees to 0 degree
Serial.print("Angle is :");
Serial.println(pos);
servo2.write(pos); // tell servo to go to position in variable 'pos'
distance_checking();
//delay(0.1); // waits 15ms for the servo to reach the position
if(digitalRead(button2) == HIGH)
{
goto label;
Serial.print("Exiting Sweeping");}
}
}
}
//reset th
label:
if(digitalRead(button2) == HIGH){
servo1.write(0);
servo2.write(90);
Serial.println("press the sweeping button to enter sweeeping mode");
delay(300);
}
}
void distance_checking()
{
if (echo_duration/58 <= 20)
{
Serial.println("the servo angle is 30");
servo1.write(30);
delay(1500);
}
else {
servo1.write(0);
}
delay(500);
}
void Metal_detected()
{if(digitalRead(button2) == LOW)
{delay(5000);
Serial.print("Metal detected at servo angle:");
Serial.println(servo2.read());
servo1.write(servo1.read());
servo2.write(servo2.read());
Serial.println("Motion is stopped");
Serial.println("Press reset to go to the home position");
}
//while(digitalRead(button2) == HIGH)
// {
// Serial.print("Reseting");
// return 0;}
}
void timerIsr()
{
trigger_pulse(); // Schedule the trigger pulses
// Flash the onboard LED distance indicator
}
// --------------------------
// trigger_pulse() called every 50 uS to schedule trigger pulses.
// Generates a pulse one timer tick long.
// Minimum trigger pulse width for the HC-SR04 is 10 us. This system
// delivers a 50 uS pulse.
// --------------------------
void trigger_pulse()
{
static volatile int state = 0; // State machine variable
if (!(--trigger_time_count)) // Count to 200mS
{ // Time out - Initiate trigger pulse
trigger_time_count = TICK_COUNTS; // Reload
state = 1; // Changing to state 1 initiates a pulse
}
switch(state) // State machine handles delivery of trigger pulse
{
case 0: // Normal state does nothing
break;
case 1: // Initiate pulse
digitalWrite(trigPin, HIGH); // Set the trigger output high
state = 2; // and set state to 2
break;
case 2: // Complete the pulse
default:
digitalWrite(trigPin, LOW); // Set the trigger output low
state = 0; // and return state to normal 0
break;
}
}
// --------------------------
// echo_interrupt() External interrupt from HC-SR04 echo signal.
// Called every time the echo signal changes state.
//
// Note: this routine does not handle the case where the timer
// counter overflows which will result in the occassional error.
// --------------------------
void echo_interrupt()
{
switch (digitalRead(echoPin)) // Test to see if the signal is high or low
{
case HIGH: // High so must be the start of the echo pulse
echo_end = 0; // Clear the end time
echo_start = micros(); // Save the start time
break;
case LOW: // Low so must be the end of hte echo pulse
echo_end = micros(); // Save the end time
echo_duration = echo_end - echo_start; // Calculate the pulse duration
break;
}
}
I tested my motors with this code one by one and they worked absolutely fine:
#include <Servo.h>
Servo myservo; // create servo object to control a servo
// twelve servo objects can be created on most boards
int pos = 0; // variable to store the servo position
void setup() {
myservo.attach(9); // attaches the servo on pin 9 to the servo object
}
void loop() {
for (pos = 0; pos <= 180; pos += 1) { // goes from 0 degrees to 180 degrees
// in steps of 1 degree
myservo.write(pos); // tell servo to go to position in variable 'pos'
delay(15); // waits 15ms for the servo to reach the position
}
for (pos = 180; pos >= 0; pos -= 1) { // goes from 180 degrees to 0 degrees
myservo.write(pos); // tell servo to go to position in variable 'pos'
delay(15); // waits 15ms for the servo to reach the position
}
}

Guru Meditation Error: Core 0 panic'ed (LoadProhibited),(Interrupt wdt timeout on CPU1)

I was trying to record light intensity values for a Li-Fi project.
This is my code:
#include <soc/sens_reg.h>
#include <soc/sens_struct.h>
#include <driver/adc.h>
#include <SD.h>
#define ADC1_GPIO36_CHANNEL ADC1_CHANNEL_0
#include <adc_channel.h>
const char filename1[] = "/part1.dat";
const char filename2[] = "/part2.dat";
File file1, file2;
int local_adc1_read(int channel) {
uint16_t adc_value;
SENS.sar_meas_start1.sar1_en_pad = (1 << channel); // Only one channel is selected
while (SENS.sar_slave_addr1.meas_status != 0)
;
SENS.sar_meas_start1.meas1_start_sar = 0;
SENS.sar_meas_start1.meas1_start_sar = 1;
while (SENS.sar_meas_start1.meas1_done_sar == 0)
;
adc_value = SENS.sar_meas_start1.meas1_data_sar;
return adc_value;
}
#define ADC_SAMPLES_COUNT 1000
int16_t abuf[ADC_SAMPLES_COUNT];
int16_t abufPos = 0;
portMUX_TYPE DRAM_ATTR timerMux = portMUX_INITIALIZER_UNLOCKED;
TaskHandle_t complexHandlerTask;
hw_timer_t * adcTimer = NULL; // Our timer
void complexHandler(void *param) {
timerAlarmDisable(adcTimer);
timerDetachInterrupt(adcTimer);
timerEnd(adcTimer);
adcTimer = NULL;
file1 = SD.open(filename1, FILE_WRITE);
file2 = SD.open(filename2, FILE_WRITE);
file1.write((const byte*)abuf, abufPos);
file2.write((const byte*)abuf, abufPos);
file1.close();
file2.close();
}
int counter;
void IRAM_ATTR onTimer() {
portENTER_CRITICAL_ISR(&timerMux);
abuf[abufPos++] = local_adc1_read(ADC1_CHANNEL_0);
//abuf[abufPos++] = adc1_get_raw(ADC1_CHANNEL_0);
if (abufPos >= 8) {
if (abuf[abufPos-7] ==
abuf[abufPos-6] ==
abuf[abufPos-5] ==
abuf[abufPos-4] ==
abuf[abufPos-3] ==
abuf[abufPos-2] ==
abuf[abufPos-1] ==
abuf[abufPos])
{
// Notify adcTask that the buffer is full.
BaseType_t xHigherPriorityTaskWoken = pdFALSE;
vTaskNotifyGiveFromISR(complexHandlerTask, &xHigherPriorityTaskWoken);
if (xHigherPriorityTaskWoken) {
portYIELD_FROM_ISR();
}
}
portEXIT_CRITICAL_ISR(&timerMux);
}
}
void setup() {
setCpuFrequencyMhz(240);
xTaskCreate(complexHandler, "Handler Task", 8192, NULL, 1, &complexHandlerTask);
adcTimer = timerBegin(3, 80, true); // 80 MHz / 80 = 1 MHz hardware clock for easy figuring
timerAttachInterrupt(adcTimer, &onTimer, true); // Attaches the handler function to the timer
timerAlarmWrite(adcTimer, 100, true); // Interrupts when counter == 45, i.e. 22.222 times a second
timerAlarmEnable(adcTimer);
Serial.begin(115200);
pinMode(2, OUTPUT);
//pinMode(36, INPUT);
if (!SD.begin())
Serial.println("SD begin failed");
while(!SD.begin()) {
Serial.print(".");
//delay(500);
SD.remove(filename1);
SD.remove(filename2);
}
}
void loop() {
}
I got this error:
Guru Meditation Error: Core 0 panic'ed (LoadProhibited). Exception was unhandled.
rst:0xc (SW_CPU_RESET),boot:0x13 (SPI_FAST_FLASH_BOOT)
configsip: 0, SPIWP:0xee
clk_drv:0x00,q_drv:0x00,d_drv:0x00,cs0_drv:0x00,hd_drv:0x00,wp_drv:0x00
mode:DIO, clock div:1
load:0x3fff0018,len:4
load:0x3fff001c,len:1100
load:0x40078000,len:10900
load:0x40080400,len:6388
entry 0x400806b4
Guru Meditation Error: Core 0 panic'ed (LoadProhibited). Exception was unhandled.
Core 0 register dump:
PC : 0x400f095f PS : 0x00060030 A0 : 0x800d1019 A1 : 0x3ffb3f80
A2 : 0x00000000 A3 : 0x3ffb2080 A4 : 0x00000020 A5 : 0x80000020
A6 : 0x00000000 A7 : 0x00000000 A8 : 0x00000005 A9 : 0x00000020
A10 : 0x00000020 A11 : 0x3ffbc0d0 A12 : 0x80087259 A13 : 0x3ffbc0d0
A14 : 0x00000000 A15 : 0x00000000 SAR : 0x00000000 EXCCAUSE: 0x0000001c
EXCVADDR: 0x00000000 LBEG : 0x00000000 LEND : 0x00000000 LCOUNT : 0x00000000
ELF file SHA256: 0000000000000000
Backtrace: 0x400f095f:0x3ffb3f80 0x400d1016:0x3ffb3fa0 0x40088269:0x3ffb3fe0
I tried decreasing the interrupt frequency, using a huge app (3 MB), increasing the CPU clock to 240 Hz, but nothing changed.
By outcommenting
vTaskNotifyGiveFromISR(complexHandlerTask, &xHigherPriorityTaskWoken);
and
xTaskCreate(complexHandler, "Handler Task", 8192, NULL, 1, &complexHandlerTask);
now the error is
Guru Meditation Error: Core 1 panic'ed (Interrupt wdt timeout on CPU1)
And some register dumps of core 1 and core 0. There isn't any change doing something in the loop.

Sensor reading communication via bluetooth

I have two bluetooth modules(HC05) connected to separate arduinos. One acting as master and other as slave. One LDR is connected to the slave part which will be taking continuous readings and sending it to master via bluetooth.
The modules are successfully paired.I could even control an led connected to master using a pushbutton connected to slave.
Since 4 days I am struggling to get the readings of LDR on the serial monitor of master.
The slave part of the project(having the LDR):
#include <SoftwareSerial.h>
SoftwareSerial BTSerial(10, 11); // RX | TX
#define ldrPin A0
int ldrValue = 0;
void setup() {
pinMode(9, OUTPUT); // this pin will pull the HC-05 pin 34 (key pin) HIGH to switch module to AT mode
digitalWrite(9, HIGH);
pinMode(ldrPin, INPUT);
BTSerial.begin(9600);
Serial.begin(9600);
}
void loop()
{
ldrValue = analogRead(ldrPin);
BTSerial.println(ldrValue);
Serial.println(ldrValue);
delay(1000);
}
The master part of the project which will be getting the reaings and displaying on serial monitor:
#include <SoftwareSerial.h>
SoftwareSerial BTSerial(10, 11); // RX | TX
const byte numChars = 1024;
char receivedChars[numChars]; // an array to store the received data
boolean newData = false;
void setup() {
pinMode(9, OUTPUT); // this pin will pull the HC-05 pin 34 (key pin) HIGH to switch module to AT mode
digitalWrite(9, HIGH);
BTSerial.begin(9600);
Serial.begin(9600);
Serial.println("<Arduino is ready>");
}
void loop() {
recvWithEndMarker();
showNewData();
}
void recvWithEndMarker() {
static byte ndx = 0;
char endMarker = '\n';
char rc;
while (BTSerial.available() > 0 && newData == false) {
rc = BTSerial.read();
if (rc != endMarker) {
receivedChars[ndx] = rc;
ndx++;
if (ndx >= numChars) {
ndx = numChars - 1;
}
}
else {
receivedChars[ndx] = '\0'; // terminate the string
ndx = 0;
newData = true;
}
}
}
void showNewData() {
if (newData == true) {
Serial.print("This just in ... ");
Serial.println(receivedChars);
newData = false;
}
}
But the problem is that in the serial monitor only the highest digit ( 3 in 392) is displayed in the serial monitor. The readings are correct but the complete readings are not displayed.
The serial monitor showed something like this:
<Arduino is ready>
This just in ... 1
This just in ... 1
This just in ... 1
This just in ... 1
This just in ... 1
This just in ... 3
This just in ... 3
This just in ... 3
This just in ... 3
This just in ... 3
Ifin the Slave part instead of LDR readings if I am sending a string "hello", then it is printing as :
<Arduino is ready>
This just in ... h
This just in ... h
This just in ... h
This just in ... h
This just in ... h
This just in ... h
I have referred this link for serial communications Serial input basics
Can someone please help me out as I am new to arduino.
To read a string directly into a variable you can use:
BTSerial.readString()
instead of:
BTSerial.read()
like in the official documentation

read timeout on second attempt following libusb_close( )

My linux application can use libusb to perform many IO operations to/from the device. However, if I close and then re-open access to the device, all read operations will timeout. I'm assuming that my close code is not complete but I can't find my bug.
The example program has these functions:
deviceCount( ) which returns the number of devices matching my VID/PID
deviceOpen( ) to open a connection to my device
deviceClose( ) to close the connection to my device and "clean-up"
deviceWrite( ) to write a specific number of bytes to my device
deviceRead( ) to read a specific number of bytes from my device
deviceReadRegisters( ) a function specific to my device that does a write followed by a read
runTest( ) a function to call all of the above in a reasonable sequence
main( ) the main function which tries to call runTest( ) multiple times.
Note that after running the program and getting the timeout error, I need to power-cycle my device to recover. Re-running my test program without power-cycling is not sufficient. It will then fail on the first call to runTest( ).
In the current implementation, I only call libusb_init( ) and libusb_exit( ) once. I've also tried calling these function each time through the runTest( ) function. This didn't make any difference.
I'm guessing that I'm doing something wrong in my deviceClose( ) function. Any insights would be greatly appreciated.
Thanks in advance! - Jeff
####### Example output
main() attempting to runTest(), i: 0
runTest() Starting!
runTest() Success calling libusb_init()
runTest() Number of devices: 1
deviceOpen() Number of alternate settings: 1
deviceOpen() Interface number: 0 number of endpoints: 5
deviceOpen() Descriptor type: 5 EP Address: 0x02 2
deviceOpen() Found outPort! outPort: 2 index: 0
deviceOpen() Descriptor type: 5 EP Address: 0x84 132
deviceOpen() Descriptor type: 5 EP Address: 0x86 134
deviceOpen() Found inPort! inPort: 134 index: 2
deviceOpen() Descriptor type: 5 EP Address: 0x01 1
deviceOpen() Descriptor type: 5 EP Address: 0x81 129
deviceOpen() Found device! VID: 0x0957 PID: 0x5f18 outPort: 2 inPort: 134
deviceOpen() Success calling libusb_claim_interface( ) handle: 0x88cd140
deviceOpen() Successful open! id: 0 handle: 0x88cd140
deviceWrite() Success writing 2 bytes
deviceRead() Success reading 146 bytes
deviceReadRegisters() Success!
deviceWrite() Success writing 2 bytes
deviceRead() Success reading 146 bytes
deviceReadRegisters() Success!
deviceWrite() Success writing 2 bytes
deviceRead() Success reading 146 bytes
deviceReadRegisters() Success!
deviceWrite() Success writing 2 bytes
deviceRead() Success reading 146 bytes
deviceReadRegisters() Success!
deviceWrite() Success writing 2 bytes
deviceRead() Success reading 146 bytes
deviceReadRegisters() Success!
deviceClose() Sucess calling libusb_release_interface( ) handle: 0x88cd140
deviceClose() Sucess calling libusb_close( ) handle: 0x88cd140
deviceClose() Success calling libusb_free_device_list( ) devs: 0x88cd170
runTest() Success!
main() attempting to runTest(), i: 1 <== this is the second call to runTest( )
runTest() Starting!
runTest() Number of devices: 1
deviceOpen() Number of alternate settings: 1
deviceOpen() Interface number: 0 number of endpoints: 5
deviceOpen() Descriptor type: 5 EP Address: 0x02 2
deviceOpen() Found outPort! outPort: 2 index: 0
deviceOpen() Descriptor type: 5 EP Address: 0x84 132
deviceOpen() Descriptor type: 5 EP Address: 0x86 134
deviceOpen() Found inPort! inPort: 134 index: 2
deviceOpen() Descriptor type: 5 EP Address: 0x01 1
deviceOpen() Descriptor type: 5 EP Address: 0x81 129
deviceOpen() Found device! VID: 0x0957 PID: 0x5f18 outPort: 2 inPort: 134
deviceOpen() Success calling libusb_claim_interface( ) handle: 0x88cd140
deviceOpen() Successful open! id: 0 handle: 0x88cd140
deviceWrite() Success writing 2 bytes
deviceRead() Problem reading 146 bytes, read only 0 bytes, inPort: 134 timeout: 2000 LIBUSB_ERROR_TIMEOUT
####### Test Program
// Test program demonstrating timeout problem following libusb_close()
//
// g++ -o libUsbTest libUsbTest.cpp -I /usr/local/include/libusb-1.0 -L /usr/local/lib -l usb-1.0
//
// sudo ./libUsbTest
#include <stdio.h>
#include <errno.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <libusb.h>
// for test purposes, using global variable
libusb_device **devs = NULL;
libusb_device_handle *handle = NULL; // non-null when device is open
libusb_context *usbContext = NULL;
unsigned short outPort = 0;
unsigned short inPort = 0;
#define VID 0x957
#define PID 0x5f18
#define OUTPUT_ENDPOINT 2
#define INPUT_ENDPOINT 6
// Returns the number of devices matching our VID / PID
// A negative return value indicates a failure
int deviceCount( ) {
int count = 0;
if (devs)
{
// Note this automatically dereferrences each device
libusb_free_device_list(devs, 1);
devs = NULL;
}
ssize_t cnt;
cnt = libusb_get_device_list(usbContext, &devs);
if (cnt < 0 || devs == 0)
{
fprintf(stderr,"deviceCount() Failed libusb_get_device_list()!\n");
return -1;
}
// Walk through the device list and count those that match our VID / PID
libusb_device *dev = NULL;
count = 0;
int i = 0;
while ((dev = devs[i++]) != NULL) {
struct libusb_device_descriptor desc;
int r = libusb_get_device_descriptor(dev, &desc);
if (r < 0) {
fprintf(stderr, "deviceCount() Failed libusb_get_device_descriptor()!\n");
return -1;
}
if (desc.idVendor == VID && desc.idProduct == PID)
{
count++;
}
}
return count;
}
// Open the USB device. Only one device can be open at a time because of the above global variables
// Returns negative number for errors
int deviceOpen( int id) {
if (!usbContext) {
fprintf(stderr,"deviceOpen() Can't open! Null usbContext!\n");
return -1;
}
if (handle) {
fprintf(stderr,"deviceOpen() Device already open!\n");
return 0;
}
if (devs == 0) {
fprintf(stderr,"deviceOpen() Can't open! Null devs!\n");
return -1;
}
// Walk through the same device list and stop when we get to the desired sensor VID / PID
libusb_device *dev = NULL;
int count = 0;
bool keepGoing = true;
int i = 0;
while ((dev = devs[i++]) != NULL && keepGoing) {
struct libusb_device_descriptor desc;
int r = libusb_get_device_descriptor(dev, &desc);
if (r < 0) {
fprintf(stderr, "deviceOpen() Failed libusb_get_device_descriptor()!\n");
return -1;
}
if (desc.idVendor == VID && desc.idProduct == PID)
{
if (count == id) {
// Match!
keepGoing = false;
// Get the out and input ports
libusb_config_descriptor *config;
libusb_get_config_descriptor( dev, 0, &config);
const libusb_interface *inter;
const libusb_interface_descriptor *interdesc;
const libusb_endpoint_descriptor *epdesc;
for (int i=0; i<(int)config->bNumInterfaces; i++)
{
inter = &config->interface[i];
fprintf(stderr,"deviceOpen() Number of alternate settings: %d\n", inter->num_altsetting);
for (int j=0; j<inter->num_altsetting; j++)
{
interdesc = &inter->altsetting[j];
fprintf(stderr, "deviceOpen() Interface number: %d number of endpoints: %d\n",
interdesc->bInterfaceNumber, interdesc->bNumEndpoints);
for (int k=0; k<interdesc->bNumEndpoints; k++)
{
epdesc = &interdesc->endpoint[k];
fprintf(stderr, "deviceOpen() Descriptor type: %d EP Address: 0x%2.2x %d\n",
epdesc->bDescriptorType, epdesc->bEndpointAddress, epdesc->bEndpointAddress);
if (epdesc->bDescriptorType == LIBUSB_DT_ENDPOINT)
{
// Found an endpoint
uint8_t address = epdesc->bEndpointAddress & 0xf;
bool input = (epdesc->bEndpointAddress & 0x80) == LIBUSB_ENDPOINT_IN;
if ( address == OUTPUT_ENDPOINT && input == false)
{
outPort = epdesc->bEndpointAddress;
fprintf(stderr, "deviceOpen() Found outPort! outPort: %d index: %d\n", outPort, k);
}
if ( address == INPUT_ENDPOINT && input == true)
{
inPort = epdesc->bEndpointAddress;
fprintf(stderr, "deviceOpen() Found inPort! inPort: %d index: %d\n", inPort, k);
}
}
} // endpoints}
} // num_altsetting
} // interfaces
libusb_free_config_descriptor( config);
// Open device
fprintf(stderr, "deviceOpen() Found device! VID: 0x%4.4x PID: 0x%4.4x outPort: %d inPort: %d\n", VID, PID, outPort, inPort);
libusb_error errCode;
errCode = (libusb_error) libusb_open( dev, &handle);
if (errCode)
{
fprintf(stderr, "deviceOpen() Failed in libusb_open(), err: %s (%d)\n", libusb_error_name( errCode), errCode);
return -1;
}
// Verify not in use by kernel
if (libusb_kernel_driver_active( handle, 0))
{
// FIXME: we could call libusb_detach_kernel_driver( ) here
fprintf(stderr, "deviceOpen() Failed! kernel owns device!\n");
return -1;
}
// Claim the interface.
errCode = (libusb_error) libusb_claim_interface( handle, 0);
if (errCode)
{
fprintf(stderr, "deviceOpen() Failed in libusb_claim_interface(), err: %s (%d)\n", libusb_error_name( errCode), errCode);
return -1;
}
fprintf(stderr, "deviceOpen() Success calling libusb_claim_interface( ) handle: %p\n", handle);
} else {
count++;
}
} // desired VID / PID
} // Walk device list
fprintf(stderr, "deviceOpen() Successful open! id: %d handle: %p\n", id, handle);
return 0;
}
// Close the USB device
// Returns negative number for errors
int deviceClose() {
if (handle)
{
// Release the interface.
libusb_error errCode = (libusb_error) libusb_release_interface( handle, 0);
if (errCode)
{
fprintf(stderr, "deviceClose() Failed in libusb_release_interface(), err: %s (%d)\n", libusb_error_name( errCode), errCode);
return -1;
}
fprintf(stderr, "deviceClose() Sucess calling libusb_release_interface( ) handle: %p\n", handle);
libusb_close( handle);
fprintf(stderr, "deviceClose() Sucess calling libusb_close( ) handle: %p\n", handle);
handle = NULL;
}
if (devs)
{
// Note this automatically dereferrences each device
libusb_free_device_list(devs, 1);
fprintf(stderr, "deviceClose() Success calling libusb_free_device_list( ) devs: %p\n", devs);
devs = NULL;
}
outPort = 0;
inPort = 0;
return 0;
}
// Write to the device.
// Returns number of bytes written. Negative return value indicates an error
int deviceWrite( unsigned char *addr, int len){
if (!handle || !outPort) {
fprintf(stderr, "deviceWrite() No handle or outPort!\n");
return -1;
}
libusb_error errCode;
int transferred;
unsigned int timeout = 2000;
errCode = (libusb_error) libusb_bulk_transfer( handle, outPort, addr, len, &transferred, timeout);
if (errCode || transferred != len) {
fprintf(stderr, "deviceWrite() Problem writing %d bytes, wrote only %d bytes, outPort: %d timeout: %d %s\n",
len, transferred, outPort, timeout, libusb_error_name(errCode));
return -1;
}
fprintf(stderr, "deviceWrite() Success writing %ld bytes\n", transferred);
return transferred;
}
// Read from the device.
// Returns number of bytes read. Negative return value indicates an error
int deviceRead( unsigned char *addr, int len){
if (!handle || !outPort) {
fprintf(stderr, "deviceRead() No handle or outPort!\n");
return -1;
}
libusb_error errCode;
int transferred;
unsigned int timeout = 2000;
errCode = (libusb_error) libusb_bulk_transfer( handle, inPort, addr, len, &transferred, timeout);
if (errCode || (transferred != len)) {
fprintf(stderr, "deviceRead() Problem reading %ld bytes, read only %ld bytes, inPort: %d timeout: %d %s\n",
len, transferred, inPort, timeout, libusb_error_name(errCode));
return -1;
}
fprintf(stderr, "deviceRead() Success reading %ld bytes\n", transferred);
return transferred;
}
// Read the registers from the USB device
// Caution: this function is specific to my device!!!
// Returns negative number for errors
int deviceReadRegisters( ) {
// Write the command to the device so that it will return the registers
unsigned char wrBuf[2] = { 0x00, 0x10};
int rtn = deviceWrite( wrBuf, sizeof(wrBuf));
if (rtn < 0) return rtn;
// Read back 146 bytes
unsigned char rdBuf[146];
rtn = deviceRead( rdBuf, sizeof( rdBuf));
if (rtn < 0) return rtn;
// Just in case we add more logic later...
fprintf(stderr,"deviceReadRegisters() Success!\n");
return rtn;
}
// Execute the full test including initialization and cleanup
// Return negative number for error
int runTest( bool performLibUsbExit) {
int rtn;
fprintf(stderr,"runTest() Starting!\n");
// Initialize library
if (usbContext == NULL) {
int r = libusb_init( &usbContext);
if (r < 0)
{
fprintf(stderr,"runTest() failed in libusb_init()\n");
return -1;
}
fprintf(stderr,"runTest() Success calling libusb_init()\n");
}
// Make sure we have a device
int count = deviceCount();
if (count < 0) {
fprintf(stderr,"runTest() Error in deviceCount()\n");
return count;
}
if (count == 0) {
fprintf(stderr,"runTest() No devices found!\n");
return -1;
}
fprintf(stderr,"runTest() Number of devices: %d\n", count);
// Open the device
rtn = deviceOpen( 0);
if (rtn < 0) return rtn;
// Read the device registers a few times
for (int i=0; i<5; i++) {
rtn = deviceReadRegisters();
if (rtn < 0) return rtn;
}
// Close the device
rtn = deviceClose();
if (rtn < 0) return rtn;
// De-initialize the library
if (performLibUsbExit == true) {
libusb_exit(usbContext);
usbContext = NULL;
fprintf(stderr,"runTest() Success calling libusb_exit()\n");
}
fprintf(stderr,"runTest() Success!\n");
return 0;
}
int main( int argc, char *argv[]) {
int rtn;
// Run the test a few times
// The first test will pass. The second pass will timeout on the read.
int maxNum = 5;
for (int i=0; i<maxNum; i++) {
fprintf(stderr, "main() attempting to runTest(), i: %d\n", i);
// Only clean-up on the last call
rtn = runTest( i+1 == maxNum ? true : false);
if (rtn < 0) return rtn;
}
fprintf(stderr,"main() Success!\n");
return 0;
}

suspicious RCU usage?

I'm running custom Fedora 17 Kernel 3.3.0-0.rc5.git3.1.yfkm2.fc17.x86_64, and the warning was shown on dmesg:
[ 858.634304]
[ 858.634324] ===============================
[ 858.634350] [ INFO: suspicious RCU usage. ]
[ 858.634375] 3.3.0-0.rc5.git3.1.yfkm2.fc17.x86_64 #1 Not tainted
[ 858.634409] -------------------------------
[ 858.634435] kernel/pid.c:425 find_task_by_pid_ns() needs rcu_read_lock() protection!
[ 858.634478]
[ 858.634479] other info that might help us debug this:
[ 858.634480]
[ 858.634528]
[ 858.634529] rcu_scheduler_active = 1, debug_locks = 0
[ 858.634567] no locks held by monitor/10550.
[ 858.634591]
[ 858.634592] stack backtrace:
[ 858.634620] Pid: 10550, comm: monitor Not tainted 3.3.0-0.rc5.git3.1.yfkm2.fc17.x86_64 #1
[ 858.634666] Call Trace:
[ 858.634688] [<ffffffff810c8c55>] lockdep_rcu_suspicious+0xe5/0x100
[ 858.634727] [<ffffffff81086921>] find_task_by_pid_ns+0x81/0xa0
[ 858.634762] [<ffffffff81086962>] find_task_by_vpid+0x22/0x30
[ 858.634798] [<ffffffff8131ccd5>] yfkm2_is_pid_running+0x15/0x40
[ 858.634835] [<ffffffff8131ce54>] sys_yfkm2_monitor+0x14/0x80
[ 858.634870] [<ffffffff816a6ba9>] system_call_fastpath+0x16/0x1b
monitor is user application that call sys_yfkm2_monitor syscall passing a pid to it. The custom code worked as expected but I'm curious with the warning message shown on dmesg. What am I doing wrong?
The user application monitor.c:
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#define SYS_yfkm2_monitor __NR_yfkm2_monitor
#define SYS_yfkm2_notifyme __NR_yfkm2_notifyme
int main (int argc, char *argv[])
{
if (argc < 2) {
printf("Error. Use %s <PID>\n", argv[0]);
return 1;
}
pid_t pid = atoi(argv[1]);
long ret;
ret = syscall(SYS_yfkm2_monitor, pid);
if (ret == 0){
printf("Sucess on adding %d!\n", pid);
return 0;
} else {
printf("Failure! Is %s a valid PID?\n", argv[1]);
return 1;
}
}
The Kernel code:
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/kthread.h>
#define YFKM2_KT_TIMEOUT (1*HZ) /* 1 second */
struct yfkm2 {
pid_t monitor; /* PID to monitor */
pid_t notifyme; /* PID to notify */
struct list_head list; /* Linked List struct */
};
/* How many Kernel Threads are running? */
atomic_t yfkm2_kthread_run_count = ATOMIC_INIT(0);
/* Define and initialize yfkm2_(linked)list */
LIST_HEAD(yfkm2_list);
/* Define and initialize yfkm2_(read&write)lock */
DEFINE_RWLOCK(yfkm2_lock);
/*
* yfkm2_is_pid_running(pid_t pid)
*
* Check if pid is running
*
* return 0 if pid is running
* return 1 if pid is not running
*/
int yfkm2_is_pid_running(pid_t pid)
{
struct task_struct *q;
q = find_task_by_vpid(pid);
if (q != NULL && q->pid == pid)
return 0;
return 1;
}
/*
* yfkm2_kill(pid_t pid)
*
* Kills pid
*
* return 0 if pid was running and send SIGKILL to pid
* return 1 if pid is not running
*/
int yfkm2_kill(pid_t pid)
{
struct task_struct *q;
q = find_task_by_vpid(pid);
if (q != NULL) {
force_sig(SIGKILL, q);
return 0;
}
return 1;
}
/*
* int yfkm2_kthread(void *data)
*
* The Kernel Thread
*
* Traverse the yfkm2_list looking for yfkm2->notifyme that are not 0.
* If any found, check if correspondent yfkm2->monitor is still running. If not
* kill yfkm2->notifyme. After traversing the list, check if the list is empty.
* If so return 0. If not sleep one second and start again.
*
* return 0 if yfkm2_list is empty
* should never return 1
*/
int yfkm2_kthread(void *data) /* data is NEVER used */
{
struct yfkm2 *yfkm2_tmp, *yfkm2_tmp2;
bool empty;
while (true) {
/* Needs write protection due possible item removal from list */
write_lock(&yfkm2_lock); /* Write lock */
list_for_each_entry_safe(yfkm2_tmp, yfkm2_tmp2,
&yfkm2_list, list) {
if (yfkm2_tmp->notifyme != 0) {
if (yfkm2_is_pid_running(yfkm2_tmp->monitor) != 0) {
yfkm2_kill(yfkm2_tmp->notifyme);
list_del(&yfkm2_tmp->list);
kfree(yfkm2_tmp);
}
}
}
write_unlock(&yfkm2_lock); /* Write unlock */
read_lock(&yfkm2_lock); /* Read lock */
empty = list_empty(&yfkm2_list);
read_unlock(&yfkm2_lock); /* Read unlock */
if (empty) {
/* The counter is increased at sys_yfkm2_notifyme()
* Before exit, decrease atomic run counter */
atomic_dec(&yfkm2_kthread_run_count);
return 0;
}
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(YFKM2_KT_TIMEOUT);
}
/* Before exit, decrease atomic run counter */
atomic_dec(&yfkm2_kthread_run_count);
return 1;
}
/*
* asmlinkage long sys_yfkm2_monitor(pid_t monitor)
*
* The system call that check if monitor correspond to a running pid and stores
* monitor at yfkm2_list->monitor
*
* return 0 if pid is running
* return 1 if pid is not running
*/
asmlinkage long sys_yfkm2_monitor(pid_t monitor)
{
struct yfkm2 *yfkm2_tmp;
if (yfkm2_is_pid_running(monitor) == 0) {
yfkm2_tmp = kmalloc(sizeof(*yfkm2_tmp), GFP_KERNEL);
yfkm2_tmp->monitor = monitor;
yfkm2_tmp->notifyme = 0;
write_lock(&yfkm2_lock);
list_add(&yfkm2_tmp->list, &yfkm2_list);
write_unlock(&yfkm2_lock);
return 0;
}
return 1;
}
/*
* asmlinkage long sys_yfkm2_notifyme(pid_t monitor, pid_t notifyme)
*
* The system call that looks for monitor at yfkm2_list->monitor. If found
* store notifyme at yfkm2_list->notifyme. It also starts the kernel thread
* if it is not running.
*
* return 0 if pid is running
* return 1 if pid is not running
*/
asmlinkage long sys_yfkm2_notifyme(pid_t monitor, pid_t notifyme)
{
struct yfkm2 *yfkm2_tmp;
bool found_monitored_pid = false;
write_lock(&yfkm2_lock); /* Write lock */
list_for_each_entry(yfkm2_tmp, &yfkm2_list, list) {
if (yfkm2_tmp->monitor == monitor) {
yfkm2_tmp->notifyme = notifyme;
found_monitored_pid = true;
break;
}
}
write_unlock(&yfkm2_lock); /* Write unlock */
if (found_monitored_pid) {
if (atomic_read(&yfkm2_kthread_run_count) < 1) {
/* The counter is decreased at yfkm2_kthread()
* Before start, increase atomic run counter */
atomic_inc(&yfkm2_kthread_run_count);
kthread_run(&yfkm2_kthread, NULL, "yfkm2_kthread");
}
return 0;
} else {
return 1;
}
}
You are not performing correct locking on the task list. For example, your yfkm2_kill() function should be:
int yfkm2_kill(pid_t pid)
{
struct task_struct *q;
rcu_read_lock();
q = find_task_by_vpid(pid);
if (q)
get_task_struct(q);
rcu_read_unlock();
if (q == NULL)
return 1;
force_sig(SIGKILL, q);
put_task_struct(q);
return 0;
}
...but your whole design appears to be severely racy. For example, one of the ->monitor tasks could exit and be replaced with a new, different task with the same PID before your kernel thread notices.
You seem to be running code without the required locks.
Such things tend to work, except that they crash once in a while (possibly a long while).
I don't know these functions so much, but it seems like find_task_by_vpid should be called under some RCU lock (probably the one that protects the process list), in read mode.