BENIGN or MALIGNANT Cancer Classification

import numpy as np
#import tensorflow as tfimport os
from random import shuffle
import cv2
import matlab
from tqdm import tqdm
import dicom as pdicom
from glob import glob
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import scipy.ndimage
from skimage import morphology
from skimage import measure
from skimage.transform import resize
from sklearn.cluster import KMeans
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.tools import FigureFactory as FF
from plotly.graph_objs import *
import matplotlib.pyplot as plt
init_notebook_mode(connected=True)
TRAIN_DIR = '/home/naima.v/mc/CancerImages/Calc_Labelled_Train1'TEST_DIR = '/home/naima.v/mc/CancerImages/Calc_Labelled_Test1'IMG_SIZE = 50LR = 1e-3MODEL_NAME = 'CANCERDET-{}-{}.model2'.format(LR, '6conv-basic')


def readDCMImg(path):
    g = glob(path + '/*.dcm')
    #print ("Total of %d DICOM images.\nFirst 5 filenames:" % len(g))    #print '\n'.join(g[:5])
readDCMImg("/home/naima.v/mc/CancerImages/Calc_Labelled_Train1");

def label_cancer_img(img):

    word_label = img.filename.split('.')[-2]
    if word_label=="BENIGN" :return [1,0]
    elif word_label=="MALIGNANT" : return [0,1]
    #elif word_label=="BENIGN_WITHOUT_CALLBACK" : return [1,0]




def med_Images(PathDicom):
    lstFilesDCM = []  # create an empty list    for filename in os.listdir(PathDicom):
        if ".dcm" in filename.lower():  # check whether the file's DICOM            lstFilesDCM.append(os.path.join(PathDicom, filename))
    return lstFilesDCM

def create_training_data():
    training_data = []
    lstFilesDCM = med_Images(TRAIN_DIR)
   # for img in tqdm(os.listdir(TRAIN_DIR)):    for img_name in tqdm(os.listdir(TRAIN_DIR)):
        print TRAIN_DIR
        print img_name
        path = os.path.abspath(TRAIN_DIR)+'/'+img_name
        #path = os.path.join(TRAIN_DIR, img_name)        print path
#        read_original = cv2.imread(img)        img = pdicom.read_file(path)
      #  im = matlab.dico('sample.dcm');
        label = label_cancer_img(img)
        print label
       # slice = img.pixel_array        #slice[slice == -2000] = 0        #plt.imshow(slice, cmap=plt.cm.gray)
        #print path            #os.path.join(TRAIN_DIR,img)        #img=cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img=cv2.resize(img.pixel_array,(IMG_SIZE,IMG_SIZE))
        training_data.append([np.array(img),np.array(label)])
    shuffle(training_data)
    np.save('cancer_training_data.npy',training_data)
    return training_data

def process_test_data():
    testing_data=[]
    lstFilesDCM = med_Images(TEST_DIR)
  #  for img_name in tqdm(os.listdir(TRAIN_DIR)):
    for img_name in tqdm(os.listdir(TEST_DIR)):
        path = os.path.abspath(TEST_DIR) + '/' + img_name
        img = pdicom.read_file(path)
        path = os.path.join(TEST_DIR,img_name)
        img_num = img.filename.split('.')[0]
        #img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)        img = cv2.resize(img.pixel_array,(IMG_SIZE,IMG_SIZE))
        testing_data.append([np.array(img),np.array(img_num)])
    shuffle(testing_data)
    np.save('cancer_test_data.npy',testing_data)
    return testing_data


train_data = create_training_data()


import tflearn
from tflearn.layers.conv import conv_2d,max_pool_2d
from tflearn.layers.core import input_data,dropout,fully_connected
from tflearn.layers.estimator import regression

convnet = input_data(shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.8)

convnet = fully_connected(convnet,2,activation='softmax')
convnet = regression(convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy',
                          name='targets')

model = tflearn.DNN(convnet, tensorboard_dir='log')


if(os.path.exists('{}.meta'.format(MODEL_NAME))):
    model.load(MODEL_NAME)
    print 'Model loaded'

train = train_data[:-750]
test  = train_data[-750:]

X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
Y = [i[1] for i in train]

test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
test_y = [i[1] for i in test]

model.fit({'input':X},{'targets':Y},n_epoch=4,validation_set=({'input':test_x},{'targets':test_y}),
          snapshot_step=50000,show_metric=True,run_id=MODEL_NAME)


import matplotlib.pyplot as plt
test_data = process_test_data()
fig = plt.figure()


for num,data in enumerate(test_data[:12]):
    img_num = data[1]
    img_data = data[0]
    y = fig.add_subplot(3,4,num+1)
    orig = img_data
    data = img_data.reshape(IMG_SIZE,IMG_SIZE,1)
    model_out = model.predict([data])[0]
    if np.argmax(model_out)==1: str_label = 'Cancerous'    else : str_label = 'Non - Cancerous'    y.imshow(orig,cmap='gray')
    plt.title(str_label)
    y.axes.get_xaxis().set_visible(False)
    y.axes.get_xaxis().set_visible(False)

plt.show()

Coursera Course 3 Structuring Machine Learning Projects

Week One - Video One - Why ML STrategy Why we should learn care about ML Strategy Here when we try to improve the performance of the system we should consider about a lot of things . They are: -Amount of data - Amount of diverse data - Train algorithm longer with gradient descent -use another optimization algorithm like Adam - use bigger network or smaller network depending out requirement - use drop out - add l2 regularization - network architecture parameters like number of hidden units, Activation function etc. Second Video - Orthogonalization Orthogonalization means in a deep learning network we can change/tune so many things for eg. hyper parameters to get a more performance in the network . So most effective people know what to tune in order to achieve a particular effect. For every set of problem there is a separate solution. Don't mix up the problems and solutions. For that, first we should find out where is the problem , whether it is with training ...

TechVision

Search This Blog

BENIGN or MALIGNANT Cancer Classification

Comments

Post a Comment

Popular posts from this blog

Explore python Libraries - Numpy, Scipy, Matplotlib

Coursera Course 3 Structuring Machine Learning Projects

Converting DICOM images into JPG Format in Centos