Click to See Complete Forum and Search --> : What is the maximum number of threads per process in Windows


raindog
May 2nd, 2005, 11:11 AM
the subject says it all

SuperKoko
May 2nd, 2005, 12:26 PM
There is no official limit.
The real limit is the total number of thread, and not the number of thread per process.
The limit is defined by global system resources (like max available ram), or other limits specific to an OS, and undocumented.
You must not take care about a such limitation.

With my computer (K6-2 333 with 192 Mo RAM), i can create about 1500 threads for the whole system.
But newer computers with Win XP can certainly have many, many more threads.

raindog
May 2nd, 2005, 12:30 PM
i have a probem with my binary tree that consists of 1024 threads ... utill i keep it under 1000 it works fine once i make more than a 1000 the program crashes... also i tried with a simple multithreaded program which only creates 1000 threas and keeps them working at the same time... the same thing happens

Arjay
May 2nd, 2005, 03:29 PM
I don't know anything about your design, but 1000+ threads is excessive. You may consider using thread pooling. Windows has a handy one for you with QueueUserWorkItem(...).

Arjay

raindog
May 2nd, 2005, 03:35 PM
I don't know anything about your design, but 1000+ threads is excessive. You may consider using thread pooling. Windows has a handy one for you with QueueUserWorkItem(...).

Arjay

i use visual studio c++ and i can't find such thing as QueueUserWorkItem()

Arjay
May 2nd, 2005, 03:47 PM
Here you go: QueueUserWorkItem (http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dllproc/base/queueuserworkitem.asp).


Arjay

raindog
May 2nd, 2005, 03:51 PM
could you explain a bit more how this pooling works...I have to create a binary tree in which each node is a thread. That binary tree consists of about 1024 elements (calculating an integral). Once I build the tree the lowest children calculate a certain function and they send the result up to their parent who sums the results from its two children and sends it to its parent (each child was created by its parent).

Arjay
May 2nd, 2005, 03:58 PM
In your current code, you are creating a thread that runs a calculation, right? The difference in using a thread pool is that instead of each node creating its own thread, you would pass a function to QueueUserWorkItem and QueueUserWorkItem would create the necessary threads for you. If you post your thread proc code, I will show you how to use the api.

Arjay

raindog
May 2nd, 2005, 04:06 PM
i pushed everything in one file for simplicity


#include <windows.h>
#include <stdio.h>
#include <cassert>
#include <conio.h>
#include <math.h>

struct ThreadParam {
double lUpper;
double lLower;
double lDeltaX;

};

DWORD WINAPI Integral(LPVOID param){


HANDLE* ThreadHandles = new HANDLE[2];
float lSizeOfMyInterval;
ThreadParam* pParam = (ThreadParam*) param;
DWORD lResultFromChild1;
DWORD lResultFromChild2;
DWORD lResultToSend;

float* pResultToSend = new float;
float* pResultFromChild1;
float* pResultFromChild2;

lSizeOfMyInterval = pParam->lUpper - pParam->lLower;

if(lSizeOfMyInterval > pParam->lDeltaX){

ThreadParam* pParam1 = new ThreadParam;
pParam1->lLower = pParam->lLower;
pParam1->lUpper = pParam->lLower + (lSizeOfMyInterval)/2;
pParam1->lDeltaX = pParam->lDeltaX;
ThreadHandles[0] = CreateThread(NULL, 0, Integral, (LPVOID)pParam1, 0, 0);

ThreadParam* pParam2 = new ThreadParam;
pParam2->lLower = pParam->lLower + (lSizeOfMyInterval)/2;
pParam2->lUpper = pParam->lUpper;
pParam2->lDeltaX = pParam->lDeltaX;
ThreadHandles[1] = CreateThread(NULL, 0, Integral, (LPVOID)pParam2, 0, 0);

WaitForMultipleObjects(2, ThreadHandles, TRUE, INFINITE);
GetExitCodeThread(ThreadHandles[0],&lResultFromChild1);
GetExitCodeThread(ThreadHandles[1],&lResultFromChild2);
pResultFromChild1=(float *) lResultFromChild1;
pResultFromChild2=(float *) lResultFromChild2;
*pResultToSend=*pResultFromChild1 + *pResultFromChild2;
delete [] ThreadHandles;
delete pResultFromChild1;
delete pResultFromChild2;
}
else{
*pResultToSend=(exp(-0.5 * pParam->lDeltaX) * (cos(pParam->lLower) * lSizeOfMyInterval) +
exp(-0.5*pParam->lDeltaX) * (cos(pParam->lUpper) * lSizeOfMyInterval));

}
lResultToSend=(DWORD) pResultToSend;
ExitThread(lResultToSend);
return 0;
}
int main(int argc, char* argv[])
{

HANDLE* ThreadHandles = new HANDLE[1];

DWORD code1;
float* transfer;

ThreadParam* pParam = new ThreadParam;
pParam->lUpper = 2*3.14;
pParam->lLower = 0;
pParam->lDeltaX = 0.015;


ThreadHandles[0] = CreateThread(NULL, 0, Integral, (LPVOID)pParam, 0, 0);


WaitForSingleObject(ThreadHandles[0], INFINITE);

GetExitCodeThread(ThreadHandles[0],&code1);
transfer=(float*) code1;
printf(" %f ",*transfer);
printf("press any key");
getch();

delete [] ThreadHandles;
return 0;
}

raindog
May 2nd, 2005, 04:10 PM
the problem occurs when i put the deltax to 0.01 as the assignment requires

Arjay
May 2nd, 2005, 06:47 PM
I took the liberty of changing the code slightly:

Rather than using the thread return param to pass the result, the code now passes the result in the ThreadParam structure
Since QueueUserWorkItem (QUWI) doesn't return a thread handle to wait on, we need to develop another method to signal when the work has been completed. I added a hEvent param to the ThreadParam structure to accomplish this.
For debug purposes, I added a uCount param to the ThreadParam structure. This count is a global thread counter that is inc when the thread proc is entered and dec when exited.
I removed all the 'new' heap allocations and replaced them with stack based allocations. This simplifies the code and makes cleanup easier. Usually you want to use heap based allocations when creating a thread to ensure the params don't go out of scope while the thread is executing. In this case, we ensure that we aren't going out of scope prematurely because we are waiting on both child events.
This code seems to work fine with deltaX at 0.01. On my machine the max threads seem to peak at about 400 (which is better than the default QUWI thread pool count and much better than the nt max thread count).

Arjay


#ifndef _WIN32_WINNT
#define _WIN32_WINNT 0x0500
#endif
#include <tchar.h>
#include <windows.h>
#include <stdio.h>
#include <cassert>
#include <conio.h>
#include <math.h>

struct ThreadParam
{
double lUpper;
double lLower;
double lDeltaX;
HANDLE hEvent; // Handle to wait on
double dResultToSend; // Returns the result
UINT* puCount; // Debug Only - tracks thread count
};

DWORD WINAPI Integral(LPVOID param)
{
double dSizeOfMyInterval = 0.0;
ThreadParam* pParam = (ThreadParam*) param;

// Debug spew -- increments thread count when enters fn;
// decrements on exit
(*(pParam->puCount))++;
printf( _T("IN -- Thread Count: %d\n"), *(pParam->puCount) );

dSizeOfMyInterval = pParam->lUpper - pParam->lLower;

if(dSizeOfMyInterval > pParam->lDeltaX)
{
ThreadParam tp1 = { pParam->lLower + (dSizeOfMyInterval) / 2,
pParam->lLower,
pParam->lDeltaX,
::CreateEvent( NULL, FALSE, FALSE, NULL ),
0.0,
pParam->puCount };

ThreadParam tp2 = { pParam->lUpper,
pParam->lLower + (dSizeOfMyInterval)/2,
pParam->lDeltaX,
::CreateEvent( NULL, FALSE, FALSE, NULL ),
0.0,
pParam->puCount };
HANDLE aHandles[] = { tp1.hEvent, tp2.hEvent };

// Queue each work item using the Windows thread pooling mechanism
::QueueUserWorkItem( Integral, &tp1, WT_EXECUTELONGFUNCTION );
::QueueUserWorkItem( Integral, &tp2, WT_EXECUTELONGFUNCTION );

// Wait for both events to signal
WaitForMultipleObjects(sizeof(aHandles)/sizeof(HANDLE),
&aHandles[0],
TRUE,
INFINITE);

// Cleanup both the handles
::CloseHandle( tp1.hEvent );
::CloseHandle( tp2.hEvent );

pParam->dResultToSend = tp1.dResultToSend + tp2.dResultToSend;
}
else
{
pParam->dResultToSend
= (exp(-0.5 * pParam->lDeltaX)
* (cos(pParam->lLower)
* dSizeOfMyInterval)
+ exp(-0.5*pParam->lDeltaX)
* (cos(pParam->lUpper)
* dSizeOfMyInterval));
}

// Debug spew -- decrements on exit
(*(pParam->puCount))--;
printf( _T("OUT -- Thread Count: %d\n"), *(pParam->puCount) );

// Signals parent waiting that this thread has exited
SetEvent( pParam->hEvent );

return 0;
}

int _tmain(int argc, _TCHAR* argv[])
{
UINT uCount = 0;
ThreadParam tp = { 2 * 3.14,
0,
0.01, // 0.015
::CreateEvent( NULL, FALSE, FALSE, NULL ),
0.0,
&uCount };

// Queue the first work item using the Windows thread pooling mechanism
::QueueUserWorkItem( Integral, &tp, WT_EXECUTELONGFUNCTION );

// Wait for the event to signal
WaitForSingleObject( tp.hEvent, INFINITE);

// Cleanup the event handle
::CloseHandle( tp.hEvent );

printf(" %f ", tp.dResultToSend);
printf("press any key");

getch();

return 0;
}

raindog
May 3rd, 2005, 02:50 AM
thanks but i keep getting errors at

error C2039: 'QueueUserWorkItem' : is not a member of '`global namespace''
error C2065: 'QueueUserWorkItem' : undeclared identifier
error C2065: 'WT_EXECUTELONGFUNCTION' : undeclared identifier

Arjay
May 3rd, 2005, 03:03 AM
thanks but i keep getting errors at

error C2039: 'QueueUserWorkItem' : is not a member of '`global namespace''
error C2065: 'QueueUserWorkItem' : undeclared identifier
error C2065: 'WT_EXECUTELONGFUNCTION' : undeclared identifierDid you add the following to the top of your #includes?


#ifndef _WIN32_WINNT
#define _WIN32_WINNT 0x0500
#endif


See the attachment for the complete solution.

Arjay

raindog
May 3rd, 2005, 03:51 AM
Did you add the following to the top of your #includes?
See the attachment for the complete solution.
Arjay
i did and still won't work ... i use visual studio 6 c++ and i have to create it in that application... what did you use to create the project you sent me in the zip?

raindog
May 3rd, 2005, 04:00 AM
i'm a bit new to all this what does ::CloseHandle() instead of CloseHandle() mean?
and why did you write _tmain instead of main

raindog
May 3rd, 2005, 04:13 AM
Here you go: QueueUserWorkItem (http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dllproc/base/queueuserworkitem.asp).


Arjay the other funny thing is that i can't find QueueUserWorkItem in my MSDN lib on my computer i can find it only in the internet version

Arjay
May 3rd, 2005, 05:20 AM
the other funny thing is that i can't find QueueUserWorkItem in my MSDN lib on my computer i can find it only in the internet versionSince you are using VC6, you will probably need to install the Platform SDK (http://www.microsoft.com/downloads/details.aspx?FamilyId=A55B6B43-E24F-4EA3-A93E-40C0EC4F68E5&displaylang=en). This includes features in Windows that weren't available when VC6 was released.

Arjay

raindog
May 3rd, 2005, 05:30 AM
i'll dl it and let you know but it will take me about 10h because i have a 128Kb/s connection :sick:

raindog
May 3rd, 2005, 07:18 AM
could you please explain to me the questions i've asked at the end of the 1. page of this thread

thanx in advance

Arjay
May 3rd, 2005, 11:52 AM
i'm a bit new to all this what does ::CloseHandle() instead of CloseHandle() mean?
and why did you write _tmain instead of mainThe '::' colons are a scoping operators which don't really matter much in this c-style program. In C++, if I was in a window class that had a member FindWindow, if I wanted to call the native FindWindow api (instead of my FindWindow member), I would call ::FindWindow. This would tell the compiler to look outside the class scope for the function. In this sample, it really doesn't matter since we don't have a class with a CloseHandle method anyway - I really did it out of habit. _tmain is the default main declaration given when creating a console project in .net 2003. It appropriately passes the correct ANSI or UNICODE strings to main depending on the compiler settings (i.e. whether the app is compiled as UNICODE).

Arjay

raindog
May 3rd, 2005, 12:05 PM
thank you very very much:thumb:

raindog
May 3rd, 2005, 04:25 PM
Q What version of Microsoft Visual Studio is required to build the Platform SDK samples?
To build the Platform SDK samples, you need either Microsoft Visual Studio .NET or Microsoft Visual Studio 2003. For more information, see Microsoft Visual Studio i've installed it i've registered it with visual studio and it still won't work ... looks like there is no hope since i use visual studio 6. i'll have to write my own thread pool :sick:

Arjay
May 3rd, 2005, 04:59 PM
i've installed it i've registered it with visual studio and it still won't work ... looks like there is no hope since i use visual studio 6. i'll have to write my own thread pool :sick:Did you make sure to change the include and lib search order so that the platform sdk files are used before the VC6 ones? Search VC help for 'VC++ Directories.' If I recall correctly it's in the "Tools\Options" menu. There should be no reason for QueueUserWorkItem not to work in VC6 with the current SDK installed.

Arjay

raindog
May 3rd, 2005, 05:10 PM
Did you make sure to change the include and lib search order so that the platform sdk files are used before the VC6 ones? Search VC help for 'VC++ Directories.' If I recall correctly it's in the "Tools\Options" menu. There should be no reason for QueueUserWorkItem not to work in VC6 with the current SDK installed.

Arjay
now it passes the compile but there is a problem the last thing that the prog writes is IN thread count 500 but it never writes out the result

Arjay
May 3rd, 2005, 05:18 PM
now it passes the compile but there is a problem the last thing that the prog writes is IN thread count 500 but it never writes out the resultDid you import the threadpool.cpp exactly into the VC6 project? Have you set a breakpoint on the start of the Integral function and debug it?

raindog
May 3rd, 2005, 05:38 PM
Did you import the threadpool.cpp exactly into the VC6 project? Have you set a breakpoint on the start of the Integral function and debug it? yes and no thread passes by the WaitForMultipleObjects(sizeof(aHandles)/sizeof(HANDLE),
&aHandles[0],
TRUE,
INFINITE);

i think that is because even thou i use this method i still need 1000 threads to be created and the pool allows only 500... i think this because when i raise the deltax to say 1 it works fine

Arjay
May 3rd, 2005, 05:45 PM
yes and no thread passes by the WaitForMultipleObjects(sizeof(aHandles)/sizeof(HANDLE),
&aHandles[0],
TRUE,
INFINITE);

i think that is because even thou i use this method i still need 1000 threads to be created and the pool allows only 500... i think this because when i raise the deltax to say 1 it works fineI tried both the 0.015 and 0.010 values and it worked - do these values work for you?

raindog
May 3rd, 2005, 05:50 PM
I tried both the 0.015 and 0.010 values and it worked - do these values work for you?
no they both stop at in 500 threads could you send me the exe files of those which work

raindog
May 3rd, 2005, 05:58 PM
this is how the program copiled on my computer with 0.01 looks like

raindog
May 3rd, 2005, 06:08 PM
what operating sistem do u use ... i use XP SP2 ... if you use some other maybe that is why it works with you ... and you see an average of 400 threads because the rest of the screen flies by and you only see the exiting stage but to reach the lowest children you still need to create more than a 1000 threads when using 0.01 .... just a thought :)

raindog
May 3rd, 2005, 06:59 PM
i've solved the problem using my old design by inserting a delay in creation of the treads this way i get that some calculating threas finish the work before the rest of the tree is created thus freeing up the used treads count the only chage i made is

ThreadParam* pParam1 = new ThreadParam; //parametri prve niti
pParam1->lLower = pParam->lLower;
pParam1->lUpper = pParam->lLower + (lSizeOfMyInterval)/2;
pParam1->lDeltaX = pParam->lDeltaX;
ThreadHandles[0] = CreateThread(NULL, 0, Integral, (LPVOID)pParam1, 0, 0);
Sleep(100); //HERE
ThreadParam* pParam2 = new ThreadParam; //parametri druge niti
pParam2->lLower = pParam->lLower + (lSizeOfMyInterval)/2;
pParam2->lUpper = pParam->lUpper;
pParam2->lDeltaX = pParam->lDeltaX;
ThreadHandles[1] = CreateThread(NULL, 0, Integral, (LPVOID)pParam2, 0, 0);

Arjay
May 4th, 2005, 11:11 AM
what operating sistem do u use ... i use XP SP2 ... if you use some other maybe that is why it works with you ... and you see an average of 400 threads because the rest of the screen flies by and you only see the exiting stage but to reach the lowest children you still need to create more than a 1000 threads when using 0.01 .... just a thought :)I'm running on Win2K. Both your program and mine work. The programs only create a maximum of 400 - 550 threads. How do I know this? Because I change the buffer size in the console window and capture the entire output - see the attached file. So we are well under 1000 threads.

raindog
May 4th, 2005, 11:20 AM
I'm running on Win2K. Both your program and mine work. The programs only create a maximum of 400 - 550 threads. How do I know this? Because I change the buffer size in the console window and capture the entire output - see the attached file. So we are well under 1000 threads.
then it must be to XP because the file i've sent you doesn't work on my comp it hangs on 500 threads

Arjay
May 4th, 2005, 11:21 AM
i've solved the problem using my old design by inserting a delay in creation of the treads this way i get that some calculating threas finish the work before the rest of the tree is created thus freeing up the used treads count the only chage i made is

ThreadParam* pParam1 = new ThreadParam; //parametri prve niti
pParam1->lLower = pParam->lLower;
pParam1->lUpper = pParam->lLower + (lSizeOfMyInterval)/2;
pParam1->lDeltaX = pParam->lDeltaX;
ThreadHandles[0] = CreateThread(NULL, 0, Integral, (LPVOID)pParam1, 0, 0);
Sleep(100); //HERE
ThreadParam* pParam2 = new ThreadParam; //parametri druge niti
pParam2->lLower = pParam->lLower + (lSizeOfMyInterval)/2;
pParam2->lUpper = pParam->lUpper;
pParam2->lDeltaX = pParam->lDeltaX;
ThreadHandles[1] = CreateThread(NULL, 0, Integral, (LPVOID)pParam2, 0, 0);
Really the fundamental problem is a design that uses so many threads. You can raise the max thread count (have you looked into WT_SET_MAX_THREADPOOL_THREAD?), but the underlying problem is that this design requires too many threads. I understand that this is an classroom exercise, and that you need to do it this way, but this approach wouldn't be of benefit in the real world. I say this because, unless you are on a multiproc (and I mean like 16+ processors or more), you are going to be burning cycles with context switches and thus wasting time. Btw, the sleep that you have added above may work on your machine but it isn't really a true fix because on another machine that sleep value probably won't get the job done. In addition, if you were to use a sleep, then you may consider to use it with the QueueUserWorkItem approach - because with the QUWI approach, you at least have the possibility of using less threads than total nodes.

Arjay

Arjay
May 4th, 2005, 11:22 AM
then it must be to XP because the file i've sent you doesn't work on my comp it hangs on 500 threadsXP probably has a reason for limiting the thread count, can you figure out what that might be?

Arjay

akraus1
May 4th, 2005, 02:51 PM
Each Thread has a default Stack size of 1 MB. If you create a thread 1 MB is reserved in the address space of your process. It is not commited yet (so your stack does not really allocates 1 MB of physical Ram until the stack is needed). In Windows you have a 2 GB address space limit. This allows you to create ca. 2000 Threads with the default Stack size in your process. There is no real limit as far as I know but you will soon hit the physical address space problem.
You can go to 3 GB with a special swith during startup of windows but this problem will be solved in XP64 once and for all.
Why? Because the address space is much bigger than the storage capacity of the human brain which is approximately 1 PB (1 Peta Byte 10^15) But you have over 2^64 = 10^19 Bytes which means you would have 10 000 more storage capacity than the human brain. This should be enough for everyone ;-)

raindog
May 10th, 2005, 04:43 PM
i gave it a few days rest and finaly came up with an idea this way the thread count will be equal to the depth of the binary tree :) and you can make the deltaX whatever small you want




#include <windows.h>
#include <stdio.h>
#include <cassert>
#include <conio.h>
#include <math.h>

struct ThreadParam {
double dUpper;
double dLower;
double dDeltaX;
double dResultToSend; // Returns the result
UINT* puCount; // Debug Only - tracks thread count
};

CRITICAL_SECTION FunctionAccess;

double CalculateFunction(double dPoint, double dSizeOfInterval){


return exp(-0.5 * dPoint)
* cos(dPoint)
* dSizeOfInterval;
}

DWORD WINAPI Integral(LPVOID param)
{

static double dSizeOfMyInterval = 0.0;
ThreadParam* pParam = (ThreadParam*) param;

// increments thread count when enters fn; decrements on exit
(*(pParam->puCount))++;
printf("IN -- Thread Count: %d\n", *(pParam->puCount) );

dSizeOfMyInterval = pParam->dUpper - pParam->dLower;


if(dSizeOfMyInterval > pParam->dDeltaX)
{
ThreadParam* pParam1 = new ThreadParam;
pParam1->dLower = pParam->dLower;
pParam1->dUpper = pParam->dLower + (dSizeOfMyInterval) / 2;
pParam1->dDeltaX = pParam->dDeltaX;
pParam1->dResultToSend = 0.0;
pParam1->puCount = pParam->puCount;

ThreadParam* pParam2 = new ThreadParam;
pParam2->dLower = pParam->dLower + (dSizeOfMyInterval) / 2;
pParam2->dUpper = pParam->dUpper;
pParam2->dDeltaX = pParam->dDeltaX;
pParam2->dResultToSend = 0.0;
pParam2->puCount = pParam->puCount;


HANDLE* ThreadHandles = new HANDLE[2];


ThreadHandles[0]=CreateThread(NULL, 0, Integral, (LPVOID)pParam1, 0, 0);
if (ThreadHandles[0]==NULL){
(*(pParam->puCount))--;
printf("OUT IR-- Thread Count: %d\n", *(pParam->puCount) );
return 0;
}
//Sleep(5);
ThreadHandles[1]=CreateThread(NULL, 0, Integral, (LPVOID)pParam2, CREATE_SUSPENDED, 0);
if (ThreadHandles[1]==NULL){
(*(pParam->puCount))--;
printf("OUT IR -- Thread Count: %d\n", *(pParam->puCount) );
return 0;
}
WaitForSingleObject(ThreadHandles[0],INFINITE);
ResumeThread(ThreadHandles[1]);
WaitForSingleObject(ThreadHandles[1],INFINITE);

//WaitForMultipleObjects(2,ThreadHandles,TRUE,INFINITE);

CloseHandle(ThreadHandles[0]);
CloseHandle(ThreadHandles[1]);

pParam->dResultToSend = pParam1->dResultToSend + pParam2->dResultToSend; //sending result to parent

}
else
{
//calcualting the function in the specific point
EnterCriticalSection(&FunctionAccess);

pParam->dResultToSend=CalculateFunction(pParam->dLower+dSizeOfMyInterval/2,dSizeOfMyInterval);

LeaveCriticalSection(&FunctionAccess);
}

// decrements on exit
(*(pParam->puCount))--;
printf("OUT -- Thread Count: %d\n", *(pParam->puCount) );




return 0;
}

int main(int argc, CHAR* argv[])
{
DWORD dStartTime;
DWORD dEndTime;
HANDLE* ThreadHandles = new HANDLE[1];
UINT uCount = 0;
ThreadParam* pParam = new ThreadParam;
pParam->dLower = 0;
pParam->dUpper = 2 * 3.141592654;
pParam->dDeltaX = 0.00001;
pParam->dResultToSend = 0.0;
pParam->puCount = &uCount;
dStartTime=GetTickCount();
InitializeCriticalSection(&FunctionAccess);

ThreadHandles[0]=CreateThread(NULL, 0, Integral, (LPVOID)pParam, 0, 0);


WaitForSingleObject( ThreadHandles[0], INFINITE);

CloseHandle( ThreadHandles[0]);
DeleteCriticalSection(&FunctionAccess);

printf(" %f ", pParam->dResultToSend);
dEndTime=GetTickCount();
printf("\nElapsed time in miliseconds %d ",dEndTime-dStartTime);
printf("\npress any key");
getch();

return 0;
}