1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
| from PIL import Image, ImageFile import os import hashlib import numpy as np from tqdm import tqdm
def get_img_sha1(img): ''' 计算图像的sha1值为文件名 :param img: :return: ''' img = np.asarray(img) sha1obj = hashlib.sha1() sha1obj.update(img) hash = sha1obj.hexdigest() return str(hash).upper()
def cut_label(path): ''' 切割标签 :param path: :return: ''' image = Image.open(path) x = 117 y = 0 w = 180 h = 30 return image.crop((x, y, w, h))
def cut_image(path): ''' 切割图片 :param path: :return: ''' image = Image.open(path) space = 67 + 5 x0, y0, w0, h0 = 0*space+5, 0*space+41, 1*space, 0*space+41+67 x1, y1, w1, h1 = 0*space+5, 1*space+41, 1*space, 1*space+41+67 x2, y2, w2, h2 = 1*space+5, 0*space+41, 2*space, 0*space+41+67 x3, y3, w3, h3 = 1*space+5, 1*space+41, 2*space, 1*space+41+67 x4, y4, w4, h4 = 2*space+5, 0*space+41, 3*space, 0*space+41+67 x5, y5, w5, h5 = 2*space+5, 1*space+41, 3*space, 1*space+41+67 x6, y6, w6, h6 = 3*space+5, 0*space+41, 4*space, 0*space+41+67 x7, y7, w7, h7 = 3*space+5, 1*space+41, 4*space, 1*space+41+67 image0 = image.crop((x0, y0, w0, h0)) image1 = image.crop((x1, y1, w1, h1)) image2 = image.crop((x2, y2, w2, h2)) image3 = image.crop((x3, y3, w3, h3)) image4 = image.crop((x4, y4, w4, h4)) image5 = image.crop((x5, y5, w5, h5)) image6 = image.crop((x6, y6, w6, h6)) image7 = image.crop((x7, y7, w7, h7)) return image0, image1, image2, image3, image4, image5, image6, image7
ImageFile.LOAD_TRUNCATED_IMAGES = True captcha_path_list = [] captcha_input_dir = os.path.join(os.getcwd(), "originCaptcha") captcha_output_dir = os.path.join(os.getcwd(), "cutedCaptcha")
for root, dirs, imgs in os.walk(captcha_input_dir): for img in imgs: captcha_path_list.append(os.path.join(root, img))
with tqdm(total=len(captcha_path_list), desc="Cut captcha") as pbar: for captcha_input_path in captcha_path_list: original_captcha_name = os.path.basename(captcha_input_path).split(".")[0] captcha_output_dir_second = os.path.join(captcha_output_dir, original_captcha_name) image = [ n for n in range(8)] image_name = [ str(n) for n in range(8)] image[0], image[1], image[2], image[3], image[4], image[5], image[6], image[7] = cut_image(captcha_input_path) for i in range(len(image)): image_name[i] = get_img_sha1(image[i]) for i, img in enumerate(image): captcha_output_path = os.path.join(captcha_output_dir_second, str(image_name[i]) + ".jpg") if not os.path.exists(captcha_output_dir_second): os.makedirs(captcha_output_dir_second) img.save(captcha_output_path) captcha_output_label_name = "label_" + str(original_captcha_name) + ".jpg" captcha_output_label_path = os.path.join(captcha_output_dir_second, captcha_output_label_name) cut_label(captcha_input_path).save(str(captcha_output_label_path)) pbar.update()
|