Skip to main content

BENIGN or MALIGNANT Cancer Classification

import numpy as np
#import tensorflow as tfimport os
from random import shuffle
import cv2
import matlab
from tqdm import tqdm
import dicom as pdicom
from glob import glob
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import scipy.ndimage
from skimage import morphology
from skimage import measure
from skimage.transform import resize
from sklearn.cluster import KMeans
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.tools import FigureFactory as FF
from plotly.graph_objs import *
import matplotlib.pyplot as plt
init_notebook_mode(connected=True)
TRAIN_DIR = '/home/naima.v/mc/CancerImages/Calc_Labelled_Train1'TEST_DIR = '/home/naima.v/mc/CancerImages/Calc_Labelled_Test1'IMG_SIZE = 50LR = 1e-3MODEL_NAME = 'CANCERDET-{}-{}.model2'.format(LR, '6conv-basic')


def readDCMImg(path):
    g = glob(path + '/*.dcm')
    #print ("Total of %d DICOM images.\nFirst 5 filenames:" % len(g))    #print '\n'.join(g[:5])
readDCMImg("/home/naima.v/mc/CancerImages/Calc_Labelled_Train1");

def label_cancer_img(img):

    word_label = img.filename.split('.')[-2]
    if word_label=="BENIGN" :return [1,0]
    elif word_label=="MALIGNANT" : return [0,1]
    #elif word_label=="BENIGN_WITHOUT_CALLBACK" : return [1,0]




def med_Images(PathDicom):
    lstFilesDCM = []  # create an empty list    for filename in os.listdir(PathDicom):
        if ".dcm" in filename.lower():  # check whether the file's DICOM            lstFilesDCM.append(os.path.join(PathDicom, filename))
    return lstFilesDCM

def create_training_data():
    training_data = []
    lstFilesDCM = med_Images(TRAIN_DIR)
   # for img in tqdm(os.listdir(TRAIN_DIR)):    for img_name in tqdm(os.listdir(TRAIN_DIR)):
        print TRAIN_DIR
        print img_name
        path = os.path.abspath(TRAIN_DIR)+'/'+img_name
        #path = os.path.join(TRAIN_DIR, img_name)        print path
#        read_original = cv2.imread(img)        img = pdicom.read_file(path)
      #  im = matlab.dico('sample.dcm');
        label = label_cancer_img(img)
        print label
       # slice = img.pixel_array        #slice[slice == -2000] = 0        #plt.imshow(slice, cmap=plt.cm.gray)
        #print path            #os.path.join(TRAIN_DIR,img)        #img=cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img=cv2.resize(img.pixel_array,(IMG_SIZE,IMG_SIZE))
        training_data.append([np.array(img),np.array(label)])
    shuffle(training_data)
    np.save('cancer_training_data.npy',training_data)
    return training_data

def process_test_data():
    testing_data=[]
    lstFilesDCM = med_Images(TEST_DIR)
  #  for img_name in tqdm(os.listdir(TRAIN_DIR)):
    for img_name in tqdm(os.listdir(TEST_DIR)):
        path = os.path.abspath(TEST_DIR) + '/' + img_name
        img = pdicom.read_file(path)
        path = os.path.join(TEST_DIR,img_name)
        img_num = img.filename.split('.')[0]
        #img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)        img = cv2.resize(img.pixel_array,(IMG_SIZE,IMG_SIZE))
        testing_data.append([np.array(img),np.array(img_num)])
    shuffle(testing_data)
    np.save('cancer_test_data.npy',testing_data)
    return testing_data


train_data = create_training_data()


import tflearn
from tflearn.layers.conv import conv_2d,max_pool_2d
from tflearn.layers.core import input_data,dropout,fully_connected
from tflearn.layers.estimator import regression

convnet = input_data(shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.8)

convnet = fully_connected(convnet,2,activation='softmax')
convnet = regression(convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy',
                          name='targets')

model = tflearn.DNN(convnet, tensorboard_dir='log')


if(os.path.exists('{}.meta'.format(MODEL_NAME))):
    model.load(MODEL_NAME)
    print 'Model loaded'

train = train_data[:-750]
test  = train_data[-750:]

X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
Y = [i[1] for i in train]

test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
test_y = [i[1] for i in test]

model.fit({'input':X},{'targets':Y},n_epoch=4,validation_set=({'input':test_x},{'targets':test_y}),
          snapshot_step=50000,show_metric=True,run_id=MODEL_NAME)


import matplotlib.pyplot as plt
test_data = process_test_data()
fig = plt.figure()


for num,data in enumerate(test_data[:12]):
    img_num = data[1]
    img_data = data[0]
    y = fig.add_subplot(3,4,num+1)
    orig = img_data
    data = img_data.reshape(IMG_SIZE,IMG_SIZE,1)
    model_out = model.predict([data])[0]
    if np.argmax(model_out)==1: str_label = 'Cancerous'    else : str_label = 'Non - Cancerous'    y.imshow(orig,cmap='gray')
    plt.title(str_label)
    y.axes.get_xaxis().set_visible(False)
    y.axes.get_xaxis().set_visible(False)

plt.show()

Comments

Popular posts from this blog

Coursera Course 3 Structuring Machine Learning Projects

Week One - Video One - Why ML STrategy Why we should learn care about ML Strategy Here when we try to improve the performance of the system we should consider about a lot of things . They are: -Amount of data - Amount of diverse data - Train algorithm longer with gradient descent -use another optimization algorithm like Adam -  use bigger network or smaller network depending out requirement -  use drop out - add l2 regularization - network architecture parameters like number of hidden units, Activation function etc. Second Video - Orthogonalization Orthogonalization means in a deep learning network we can change/tune so many things for eg. hyper parameters to get a more performance in the network . So most effective people know what to tune in order to achieve a particular effect. For every set of problem there is a separate solution. Don't mix up the problems and solutions. For that, first we should find out where is the problem , whether it is with training ...

Converting DICOM images into JPG Format in Centos

Converting DICOM images into JPG Format in Centos I wanted to work with medical image classification using Deep learning. The Image data set was .dcm format. So to convert the images to jpg format following steps have performed. Used ImageMagick software. http://www.ofzenandcomputing.com/batch-convert-image-formats-imagemagick/ Installed ImageMagick in Centos by downloading the rom and installing its libraries : rpm -Uvh ImageMagick-libs-7.0.7-10.x86_64.rpm rpm -Uvh ImageMagick-7.0.7-10.x86_64.rpm After installation the the image which is to be converted is pointed in directory. Inside the directory executed the command: mogrify -format jpg *.dcm Now dcm image is converted to JPG format.