Created
November 29, 2010 22:06
-
-
Save jamesladd/720718 to your computer and use it in GitHub Desktop.
x86 Assembler - win32 overlapped non-blocking io.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; ----------------------------------------------------------------------------- | |
; main.asm - FASt Server - Main FASt Server entry point and loader. | |
; http://www.jamesladdcode.com/ | |
; | |
; ----------------------------------------------------------------------------- | |
; | |
.686 ; create 32 bit code | |
.model flat, stdcall ; 32 bit memory model | |
option casemap :none ; case sensitive | |
; ----------------------------------------------------------------------------- | |
; | |
include \masm32\include\windows.inc | |
include \masm32\include\kernel32.inc | |
includelib \masm32\lib\kernel32.lib | |
include \masm32\include\ws2_32.inc | |
include \masm32\include\wsock32.inc | |
includelib \masm32\lib\ws2_32.lib | |
includelib \masm32\lib\mswsock.lib ; winsock2 ms specific library. | |
include \masm32\include\masm32.inc | |
includelib \masm32\lib\masm32.lib | |
; ----------------------------------------------------------------------------- | |
; | |
WSA_FLAG_OVERLAPPED equ 01h | |
WSA_IO_PENDING equ ERROR_IO_PENDING | |
WSABUF struct | |
len DWORD ? | |
buf PBYTE ? | |
WSABUF ends | |
LPWSABUF typedef ptr WSABUF | |
FASTSERVER struct | |
wsadata WSADATA <> | |
ListenSocket SOCKET ? | |
ListenAddr sockaddr_in <> | |
hIOCompletionPort HANDLE ? | |
dwIOThreadCount DWORD ? | |
FASTSERVER ends | |
PFASTSERVER typedef ptr FASTSERVER | |
CONTEXT_KEY struct | |
socket SOCKET ? | |
ovIn OVERLAPPED <> | |
ovOut OVERLAPPED <> | |
dwRecvBytes DWORD ? | |
dwSendBytes DWORD ? | |
dwFlags DWORD ? | |
wsaInBuf WSABUF <> | |
wsaOutBuf WSABUF <> | |
chInBuf db 32 dup(?) | |
chOutBuf db 32 dup(?) | |
CONTEXT_KEY ends | |
PCONTEXT_KEY typedef ptr CONTEXT_KEY | |
; ----------------------------------------------------------------------------- | |
; | |
.data | |
server FASTSERVER <> | |
szGetVersionExFailed db 13,10,"Failed to get Windows version.",0 | |
szOsVersionIncorrect db 13,10,"Windows NT 3.51 or later required.",0 | |
szCreateListenSocketFailed db 13,10,"Failed to create a socket to listen on.",0 | |
szBindFailed db 13,10,"Failed to bind socket local address and port.",0 | |
szCompletionPortFailed db 13,10,"Failed to create IO Completion Port.",0 | |
szThreadStart db 13,10,"IO Worker Thread Started.",0 | |
szThreadEnd db 13,10,"IO Worker Thread Ended.",0 | |
szThreadEvent db 13,10,"IO Worker Thread IO Event.",13,10,0 | |
szListening db 13,10,"Listening for connections.",13,10,0 | |
szAcceptedSocket db 13,10,"Accepted new socket connection.",13,10,0 | |
szAcceptInvalidSocket db 13,10,"Accept got an invalid socket.",0 | |
szAcceptSocketAssociateFailed db 13,10,"Failed to associate accepted scoket with completion port.",0 | |
szReadFailed db 13,10,"Socket read failed.",0 | |
szReadFailedFatal db 13,10,"Socket read failed *fatally*.",0 | |
szGetQueuedCompletionStatusFailed db 13,10,"Get of a queued completion *failed*",13,10,0 | |
szEndOfFile1 db 13,10,"End Of File (1).",13,10,0 | |
szEndOfFile2 db 13,10,"End Of File (2).",13,10,0 | |
szWriteFailed db 13,10,"Write to socket *failed*.",13,10,0 | |
.code | |
; ----------------------------------------------------------------------------- | |
; | |
align 4 | |
issue_read_request proc pContextKey:PCONTEXT_KEY | |
local dwLoopCounter:DWORD | |
; Issue a read request for at most 6 times. The count is just a number I | |
; chose. We can get errors we can handle so if we get one, we want to loop | |
; again but we dont want to do this forever. | |
; | |
mov dwLoopCounter, 6 | |
issue_read_loop: | |
mov edx, pContextKey | |
mov [edx].CONTEXT_KEY.wsaInBuf.len, 32 | |
lea ecx, [edx].CONTEXT_KEY.chInBuf | |
mov [edx].CONTEXT_KEY.wsaInBuf.buf, ecx | |
invoke WSARecv, [edx].CONTEXT_KEY.socket, addr [edx].CONTEXT_KEY.wsaInBuf, 1, | |
addr [edx].CONTEXT_KEY.dwRecvBytes, addr [edx].CONTEXT_KEY.dwFlags, | |
addr [edx].CONTEXT_KEY.ovIn, NULL | |
cmp eax, 0 | |
jne @F | |
; Failed to read. | |
invoke StdOut, szReadFailed | |
ret | |
; Read succeeded immediately, | |
; wait for completion packet before processing it. | |
@@: | |
ret | |
; Check if the IO is pending, which is a good thing. | |
; | |
@@: | |
invoke GetLastError | |
cmp eax, ERROR_IO_PENDING | |
jne @F | |
ret | |
; Handle those errors we can deal with. | |
; | |
@@: | |
cmp eax, ERROR_INVALID_USER_BUFFER | |
je @F | |
cmp eax, ERROR_NOT_ENOUGH_QUOTA | |
je @F | |
cmp eax, ERROR_NOT_ENOUGH_MEMORY | |
je @F | |
; Read failed with an error we cant recover from ! | |
invoke StdOut, addr szReadFailedFatal | |
ret | |
; Try again if we have not tried enough already. | |
; | |
@@: | |
invoke Sleep, 50 | |
dec dwLoopCounter | |
jnz issue_read_loop | |
ret | |
issue_read_request endp | |
; ----------------------------------------------------------------------------- | |
; | |
align 4 | |
io_thread_func proc pServer:PFASTSERVER | |
local hIOCompletionPort:HANDLE | |
local dwNumRead:DWORD | |
local pContextKey:PTR DWORD | |
local pOverlapped:PTR OVERLAPPED | |
invoke StdOut, addr szThreadStart | |
mov edx, pServer | |
mov ecx, [edx].FASTSERVER.hIOCompletionPort | |
mov hIOCompletionPort, ecx | |
; The thread blocks here forever waiting for a completion event to be | |
; available. When an event arrives the thread unblocks and processes the | |
; compeltion event. After the event has been processed the thread goes | |
; back to blocking (waiting) for another event. | |
; | |
io_thread_func_loop: | |
invoke GetQueuedCompletionStatus, hIOCompletionPort, addr dwNumRead, | |
addr pContextKey, addr pOverlapped, INFINITE | |
cmp eax, 0 | |
jne @F | |
cmp pOverlapped, NULL | |
jne @F | |
; Serious Error ! | |
; GetQueueCompletionStatus failed and we got nothing back. | |
; | |
invoke StdOut, addr szGetQueuedCompletionStatusFailed | |
ret | |
@@: | |
cmp pOverlapped, NULL | |
jne @F | |
; IO operation failed. The things I have read cant tell me why | |
; this happens. | |
; | |
mov edx, pContextKey | |
invoke closesocket, [edx].CONTEXT_KEY.socket | |
invoke GetProcessHeap | |
mov edx, pContextKey | |
invoke HeapFree, eax, NULL, edx | |
invoke StdOut, addr szEndOfFile1 | |
jmp io_thread_func_loop | |
; Are we at the end of the file ? | |
; | |
@@: | |
cmp dwNumRead, 0 | |
jne @F | |
mov edx, pContextKey | |
invoke closesocket, [edx].CONTEXT_KEY.socket | |
invoke GetProcessHeap | |
mov edx, pContextKey | |
invoke HeapFree, eax, NULL, edx | |
invoke StdOut, addr szEndOfFile2 | |
jmp io_thread_func_loop | |
; We have read some data, so append to our buffer, write the data out (Echo) | |
; and issue another read. We must issue another read or we dont get any more | |
; data. | |
; | |
@@: | |
mov edx, pContextKey | |
mov ecx, dwNumRead | |
mov [edx].CONTEXT_KEY.wsaOutBuf.len, 1 | |
lea ecx, [edx].CONTEXT_KEY.chInBuf | |
mov al, [ecx] | |
lea ecx, [edx].CONTEXT_KEY.chOutBuf | |
mov [ecx], al | |
mov [edx].CONTEXT_KEY.wsaOutBuf.buf, ecx | |
mov [edx].CONTEXT_KEY.dwFlags, 0 | |
; TODO: Modify so we only send back out when buffer full or newline received. | |
invoke WSASend, [edx].CONTEXT_KEY.socket, addr [edx].CONTEXT_KEY.wsaOutBuf, 1, | |
addr [edx].CONTEXT_KEY.dwSendBytes, addr [edx].CONTEXT_KEY.dwFlags, | |
addr [edx].CONTEXT_KEY.ovOut, NULL | |
cmp eax, 0 | |
je @F | |
invoke WSAGetLastError | |
cmp eax, WSA_IO_PENDING | |
je @F | |
; Failed to write data out. | |
invoke StdOut, addr szWriteFailed | |
; Issue a new read | |
; | |
@@: | |
invoke issue_read_request, pContextKey | |
invoke StdOut, addr szThreadEvent | |
jmp io_thread_func_loop | |
invoke StdOut, addr szThreadEnd | |
ret | |
io_thread_func endp | |
; ----------------------------------------------------------------------------- | |
; | |
align 4 | |
fast_server_run proc pServer:PFASTSERVER | |
local OSVersionInfo:OSVERSIONINFO | |
local SystemInfo:SYSTEM_INFO | |
local hIOCompletionPort:HANDLE | |
local dwIOThreadCount:DWORD | |
local dwThreadId:DWORD | |
local saClient:sockaddr | |
local dwClientSize:DWORD | |
local acceptSocket:SOCKET | |
local pContextKey:PCONTEXT_KEY | |
; Check we are running under the right version of Windows. We can only | |
; run under versions of Windows NT (3.51, 4.0) or later. Includes XP. | |
; | |
lea edx, OSVersionInfo | |
mov [edx].OSVERSIONINFO.dwOSVersionInfoSize, sizeof OSVERSIONINFO | |
invoke GetVersionEx, edx | |
cmp eax, 0 | |
jne @F | |
; Failed to get version information. | |
invoke StdOut, addr szGetVersionExFailed | |
ret | |
@@: | |
lea edx, OSVersionInfo | |
cmp [edx].OSVERSIONINFO.dwPlatformId, VER_PLATFORM_WIN32_NT | |
je @F | |
; Not running correct version of Windows. | |
invoke StdOut, addr szOsVersionIncorrect | |
ret | |
; Start Windows Socket subsystem. | |
; | |
@@: | |
mov edx, pServer | |
invoke WSAStartup, 0202h, addr [edx].FASTSERVER.wsadata | |
; Create a socket that we can listen on for client connections. | |
; By default sockets are overlapped but we will explicitly create one | |
; overlapped. | |
; | |
invoke WSASocket, AF_INET, SOCK_STREAM, 0, NULL, 0, WSA_FLAG_OVERLAPPED | |
cmp eax, INVALID_SOCKET | |
jne @F | |
; Failed to create a socket to listen on. | |
invoke StdOut, addr szCreateListenSocketFailed | |
ret | |
@@: | |
mov edx, pServer | |
mov [edx].FASTSERVER.ListenSocket, eax | |
; Bind a local address and port that clients can connect to. | |
; I have chosen 9080 as the port. | |
; | |
invoke htons, 9080d | |
mov edx, pServer | |
mov [edx].FASTSERVER.ListenAddr.sin_port, ax | |
mov [edx].FASTSERVER.ListenAddr.sin_family, AF_INET | |
mov [edx].FASTSERVER.ListenAddr.sin_addr.S_un.S_addr, INADDR_ANY | |
invoke bind, [edx].FASTSERVER.ListenSocket, addr [edx].FASTSERVER.ListenAddr, sizeof sockaddr_in | |
cmp eax, SOCKET_ERROR | |
jne @F | |
; Failed to bind local address and port. | |
invoke StdOut, addr szBindFailed | |
ret | |
; Create an IO Completion Port. The first time we do this we dont | |
; associate a socket with the port. We also never associate the listening | |
; socket with the completion port. | |
; Let the IO subsystem use its default number of threads for | |
; handling IO. You could experiment with changing this value, but I dont | |
; think it has a great impact, but YOU should experiment. | |
; | |
@@: | |
invoke CreateIoCompletionPort, INVALID_HANDLE_VALUE, NULL, 0, 0 | |
cmp eax, NULL | |
jne @F | |
; Failed to create the IO completion port | |
invoke StdOut, addr szCompletionPortFailed | |
ret | |
@@: | |
mov edx, pServer | |
mov [edx].FASTSERVER.hIOCompletionPort, eax | |
mov hIOCompletionPort, eax | |
; Create a set of IO Worker threads. These threads block on the completion | |
; port and wait for completion events. When a completion event is signaled | |
; a thread handles it. We dont use completion routines as a long running | |
; completion routine could starve the underlying subsystem from handling | |
; additional events. | |
; We create two threads per system processor plus two, which is a good | |
; rule of thumb. You can experiment with making this more, but I wouldnt | |
; make it less. | |
; Note how we close the handle to the thread after it is created. This | |
; doesnt stop the thread. We dont keep the handle around as we can just | |
; post special completion keys to the completion port to get the threads | |
; to end. | |
; | |
invoke GetSystemInfo, addr SystemInfo | |
mov edx, SystemInfo.SYSTEM_INFO.dwNumberOfProcessors | |
add edx, edx | |
add edx, 2 | |
mov ecx, pServer | |
mov [ecx].FASTSERVER.dwIOThreadCount, edx | |
mov dwIOThreadCount, edx | |
@@: | |
invoke CreateThread, NULL, 0, io_thread_func, pServer, 0, addr dwThreadId | |
invoke CloseHandle, eax | |
dec dwIOThreadCount | |
jnz @B | |
; Listen on the listening socket, which will allow clients to connect. | |
; listen() allows you to specify the backlog of connection, but from what I have | |
; read this can be from 1 to 5, with all values over 5 changed to 5. | |
; | |
mov edx, pServer | |
invoke listen, [edx].FASTSERVER.ListenSocket, 5 | |
invoke StdOut, addr szListening | |
; Loop forever accepting new connections and reading from them. | |
; | |
mov dwClientSize, sizeof sockaddr | |
fast_server_run_loop: | |
; Accept a new connection from a client. The next accepted connection in the queue | |
; is returned. | |
; | |
mov edx, pServer | |
invoke WSAAccept, [edx].FASTSERVER.ListenSocket, addr saClient, addr dwClientSize, NULL, NULL | |
cmp eax, INVALID_SOCKET | |
jne @F | |
; Invalid socket from accept. | |
invoke StdOut, addr szAcceptInvalidSocket | |
jmp fast_server_run_loop | |
@@: | |
mov acceptSocket, eax | |
invoke StdOut, addr szAcceptedSocket | |
; Create a completion key that we can pass along with the accepted socket to the | |
; IO Completion Port. This way, when IO completes on the socket the completion key | |
; will be passed along with the completion status. ie: the IO worker thread gets | |
; the completion key. | |
; | |
invoke GetProcessHeap | |
invoke HeapAlloc, eax, 08h, sizeof CONTEXT_KEY | |
mov pContextKey, eax | |
invoke RtlZeroMemory, eax, sizeof CONTEXT_KEY | |
; Create and mask an event so that we dont get events for writes | |
; that complete. We dont need these events, just read completion events. | |
; This speeds things up quite a bit. | |
; | |
invoke CreateEvent, NULL, TRUE, FALSE, NULL | |
or eax, 01h | |
mov edx, pContextKey | |
mov ecx, acceptSocket | |
mov [edx].CONTEXT_KEY.socket, ecx | |
mov [edx].CONTEXT_KEY.ovOut.hEvent, eax | |
; Associate the newly accepted socket and completion key with the | |
; IO Completion Port. | |
; | |
invoke CreateIoCompletionPort, acceptSocket, hIOCompletionPort, edx, 0 | |
cmp eax, NULL | |
jne @F | |
; Failed to associate new socket with completion port. | |
invoke StdOut, addr szAcceptSocketAssociateFailed | |
jmp fast_server_run_loop | |
; Issue a read request on the newly accepted socket. | |
; This read will be queued and when it completes we will be notifed. | |
; ie: One of the threads will wake up and get a completion key etc. | |
; | |
@@: | |
invoke issue_read_request, pContextKey | |
jmp fast_server_run_loop | |
ret | |
fast_server_run endp | |
; ----------------------------------------------------------------------------- | |
; Executable program starting/entry point. | |
; | |
start: | |
invoke fast_server_run, addr server | |
invoke ExitProcess, eax | |
end start |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment