Last active
January 1, 2016 10:09
-
-
Save onesuper/8129184 to your computer and use it in GitHub Desktop.
liugu bfs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| __global__ void | |
| New_BFSKernel_multiBlock_2LQ_inGPU(int * workset,int * new_workset, Node* g_graph_node, Edge * g_graph_edge, int * visited, int * cost, int worksetsize, int * newsize,int level,int *flag,int *special_q) | |
| { | |
| int tid = blockIdx.x*blockDim.x + threadIdx.x; | |
| count=0; | |
| global_level = level; | |
| global_worksize = worksetsize; | |
| global_flag = 0; | |
| odd = 0; | |
| global_newsize = 0; | |
| global_fold =0; | |
| __shared__ int local_q[LOCAL_MEM]; | |
| __shared__ int local_q_size; | |
| __shared__ int old_pos; | |
| __syncthreads(); | |
| int k = 0; | |
| while(1){ | |
| if(threadIdx.x==0){ | |
| local_q_size = 0; | |
| old_pos=0; | |
| } | |
| __syncthreads(); | |
| if(tid < global_worksize){ | |
| int id; | |
| if( odd%2==0 ){ //first time read from workset | |
| id = workset[tid]; | |
| workset[tid] = 0; | |
| } | |
| else{ | |
| id = new_workset[tid]; | |
| new_workset[tid]=0; | |
| } | |
| cost[id] = global_level; | |
| //Node curNode = g_graph_node[id]; | |
| Node curNode = tex1Dfetch(g_graph_node_ref,id); | |
| if( curNode.y > 128 ){ | |
| int old_special_pos = atomicAdd( (int *)&special_q_size,1); | |
| special_q[old_special_pos]=id; | |
| atomicAdd( (int *)&special_node_num,1); | |
| }else{ | |
| for(int i = curNode.x; i < curNode.x+curNode.y; i++){ | |
| //int nid= g_graph_edge[i].x; | |
| Edge curEdge = tex1Dfetch(g_graph_edge_ref,i); | |
| int nid = curEdge.x; | |
| int old_visited_value = atomicExch((int*)&visited[nid],1); | |
| if( old_visited_value == 0 ){ //this guarantees that only one thread update the workset | |
| // int old_pos= atomicAdd((int*)&(global_newsize),1); | |
| // if(odd%2 == 0) new_workset[old_pos] = nid; | |
| // else workset[old_pos] =nid; | |
| int old_local_pos = atomicAdd( (int*)&local_q_size,1); | |
| local_q[old_local_pos] = nid; | |
| } | |
| } | |
| } | |
| } | |
| __syncthreads(); | |
| //while(1/*special_q_size > 0*/){ | |
| for(int t = 0; t<special_node_num;t++){ | |
| int special_id = special_q[special_q_size-1]; | |
| Node special_node = tex1Dfetch(g_graph_node_ref,special_id); | |
| for(int i = tid; i < special_node.y; i+=gridDim.x*blockDim.x){ | |
| Edge curEdge_s = tex1Dfetch(g_graph_edge_ref, special_node.x+i ); | |
| int nid_s = curEdge_s.x; | |
| int old_visited_value_s = atomicExch( (int *)&visited[nid_s],1); | |
| if(old_visited_value_s ==0){ | |
| int old_local_pos = atomicAdd( (int*)&local_q_size,1); | |
| local_q[old_local_pos] = nid_s; | |
| } | |
| } | |
| if(tid==0){ | |
| special_q_size --; | |
| } | |
| } | |
| __syncthreads(); | |
| if(threadIdx.x==0){ | |
| old_pos = atomicAdd((int*)&global_newsize,local_q_size); | |
| } | |
| __syncthreads(); | |
| for(int i = threadIdx.x; i<local_q_size; i+=blockDim.x){ | |
| if(odd%2 ==0 ) new_workset[old_pos+i] = local_q[i]; | |
| else workset[old_pos+i] = local_q[i]; | |
| } | |
| start_global_barrier(k+1); | |
| k++; | |
| if(tid==0){ | |
| quitout=0; | |
| if((global_newsize > THREAD_PER_BLOCK*gridDim.x) || (global_newsize <= THREAD_PER_BLOCK*(gridDim.x-1))||global_newsize ==0) | |
| quitout=1; | |
| global_worksize=global_newsize; | |
| global_newsize=0; | |
| odd++; | |
| global_level++; | |
| special_node_num = 0; | |
| } | |
| start_global_barrier(k+1); | |
| k+=1; | |
| if(quitout==1){ | |
| if(tid==0){ | |
| *flag = global_level; | |
| *newsize = global_worksize; | |
| } | |
| return; | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment