Thanks to visit codestin.com
Credit goes to www.scribd.com

0% found this document useful (0 votes)
18 views13 pages

C) Le Script But Not Complet Partie 1

The document outlines a comprehensive project for removing text from manga and comics using advanced detection and inpainting techniques, fully compatible with Google Colab. It includes the installation of various dependencies, setup of multiple text detection methods (like EasyOCR, PaddleOCR, and OpenCV), and specialized detection for manga-specific elements such as speech bubbles and sound effects. The project aims to provide a robust solution for text removal in comic images through a multi-method approach.

Uploaded by

bobsviking22
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
18 views13 pages

C) Le Script But Not Complet Partie 1

The document outlines a comprehensive project for removing text from manga and comics using advanced detection and inpainting techniques, fully compatible with Google Colab. It includes the installation of various dependencies, setup of multiple text detection methods (like EasyOCR, PaddleOCR, and OpenCV), and specialized detection for manga-specific elements such as speech bubbles and sound effects. The project aims to provide a robust solution for text removal in comic images through a multi-method approach.

Uploaded by

bobsviking22
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 13

# Complete Manga/Comic Text Removal Project

# Advanced solution with multiple detection methods and inpainting techniques


# Fully compatible with Google Colab

import os
import sys
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFilter, ImageEnhance
import torch
import torchvision.transforms as transforms
from pathlib import Path
import requests
import zipfile
import gdown
from typing import List, Tuple, Optional, Dict
import warnings
import json
import time
from tqdm import tqdm
import gc
warnings.filterwarnings('ignore')

# ======================= INSTALLATION SETUP =======================

def install_all_dependencies():
"""Complete dependency installation for Google Colab"""
print("Installing all required packages... This may take a few minutes.")

# Core packages
packages = [
"torch torchvision torchaudio --index-url
https://download.pytorch.org/whl/cu118",
"opencv-python-headless",
"pillow>=9.0.0",
"numpy>=1.21.0",
"matplotlib>=3.5.0",
"tqdm",
"scipy",
"scikit-image",
"scikit-learn"
]

# OCR packages
ocr_packages = [
"easyocr",
"paddlepaddle-gpu" if torch.cuda.is_available() else "paddlepaddle",
"paddleocr>=2.6.0"
]

# AI/ML packages
ai_packages = [
"transformers>=4.20.0",
"diffusers>=0.21.0",
"accelerate>=0.20.0",
"controlnet-aux",
"xformers" if torch.cuda.is_available() else "",
"segment-anything",
"ultralytics>=8.0.0"
]

# Additional utilities
util_packages = [
"imageio",
"imageio-ffmpeg",
"gradio",
"ipywidgets"
]

all_packages = packages + ocr_packages + ai_packages + util_packages

for package in all_packages:


if package: # Skip empty strings
try:
print(f"Installing {package}...")
os.system(f"pip install -q {package}")
except Exception as e:
print(f"Warning: Could not install {package}: {e}")

# Additional setup for specific packages


try:
import nltk
nltk.download('punkt', quiet=True)
except:
pass

print("✅ All dependencies installed successfully!")

# ======================= ADVANCED TEXT DETECTION =======================

class AdvancedTextDetector:
"""Multi-method text detection with manga/comic specialization"""

def __init__(self):
self.setup_all_detectors()
self.detection_cache = {}

def setup_all_detectors(self):
"""Initialize all available text detection methods"""
print("🔧 Setting up text detection models...")

# OCR Readers
self.detectors = {}

# EasyOCR setup
try:
import easyocr
self.detectors['easyocr'] = easyocr.Reader(
['en', 'ja', 'ko', 'zh', 'th', 'vi'],
gpu=torch.cuda.is_available()
)
print("✅ EasyOCR initialized")
except Exception as e:
print(f"⚠️ EasyOCR failed: {e}")

# PaddleOCR setup
try:
from paddleocr import PaddleOCR
self.detectors['paddle_en'] = PaddleOCR(
use_angle_cls=True,
lang='en',
show_log=False,
use_gpu=torch.cuda.is_available()
)
self.detectors['paddle_ch'] = PaddleOCR(
use_angle_cls=True,
lang='ch',
show_log=False,
use_gpu=torch.cuda.is_available()
)
print("✅ PaddleOCR initialized")
except Exception as e:
print(f"⚠️ PaddleOCR failed: {e}")

# CRAFT Text Detection (if available)


try:
self.setup_craft_detector()
except:
print("⚠️ CRAFT detector not available")

# OpenCV-based detectors
self.setup_opencv_detectors()

print(f"✅ Text detection setup complete! Available methods:


{list(self.detectors.keys())}")

def setup_craft_detector(self):
"""Setup CRAFT text detector for better comic text detection"""
try:
# Download CRAFT model if not exists
craft_path = "/content/craft_mlt_25k.pth"
if not os.path.exists(craft_path):
print("Downloading CRAFT model...")
url =
"https://github.com/clovaai/CRAFT-pytorch/releases/download/v1.0/craft_mlt_25k.pth"
os.system(f"wget -q {url} -O {craft_path}")

# Note: Full CRAFT implementation would go here


# For now, we'll use a placeholder
self.detectors['craft'] = None

except Exception as e:
print(f"CRAFT setup failed: {e}")

def setup_opencv_detectors(self):
"""Setup OpenCV-based text detection methods"""
# EAST Text Detector
try:
east_path = "/content/frozen_east_text_detection.pb"
if not os.path.exists(east_path):
print("Downloading EAST model...")
url =
"https://github.com/opencv/opencv_extra/raw/master/testdata/dnn/frozen_east_text_de
tection.pb"
os.system(f"wget -q {url} -O {east_path}")
self.detectors['east'] = cv2.dnn.readNet(east_path)
print("✅ EAST detector initialized")
except Exception as e:
print(f"⚠️ EAST detector failed: {e}")

def detect_text_comprehensive(self, image: np.ndarray,


min_confidence: float = 0.3) -> List[Dict]:
"""
Comprehensive text detection using all available methods

Returns:
List of detection dictionaries with bbox, confidence, method, text
"""
results = []

# Method 1: EasyOCR
if 'easyocr' in self.detectors:
results.extend(self._detect_with_easyocr(image, min_confidence))

# Method 2: PaddleOCR
if 'paddle_en' in self.detectors:
results.extend(self._detect_with_paddle(image, min_confidence))

# Method 3: EAST
if 'east' in self.detectors:
results.extend(self._detect_with_east(image, min_confidence))

# Method 4: OpenCV methods


results.extend(self._detect_with_opencv(image, min_confidence))

# Method 5: Manga-specific detection


results.extend(self._detect_manga_specific(image, min_confidence))

# Merge and filter results


merged_results = self._merge_detections(results)

return merged_results

def _detect_with_easyocr(self, image: np.ndarray, min_confidence: float) ->


List[Dict]:
"""EasyOCR detection"""
results = []
try:
detections = self.detectors['easyocr'].readtext(image)
for bbox, text, confidence in detections:
if confidence >= min_confidence:
bbox_array = np.array(bbox, dtype=np.int32)
x_min, y_min = np.min(bbox_array, axis=0)
x_max, y_max = np.max(bbox_array, axis=0)

results.append({
'bbox': (x_min, y_min, x_max, y_max),
'confidence': confidence,
'method': 'easyocr',
'text': text,
'polygon': bbox
})
except Exception as e:
print(f"EasyOCR detection error: {e}")
return results

def _detect_with_paddle(self, image: np.ndarray, min_confidence: float) ->


List[Dict]:
"""PaddleOCR detection"""
results = []

for lang in ['paddle_en', 'paddle_ch']:


if lang not in self.detectors:
continue

try:
ocr_results = self.detectors[lang].ocr(image, cls=True)
if ocr_results and ocr_results[0]:
for item in ocr_results[0]:
bbox, (text, confidence) = item
if confidence >= min_confidence:
bbox_array = np.array(bbox, dtype=np.int32)
x_min, y_min = np.min(bbox_array, axis=0)
x_max, y_max = np.max(bbox_array, axis=0)

results.append({
'bbox': (x_min, y_min, x_max, y_max),
'confidence': confidence,
'method': lang,
'text': text,
'polygon': bbox
})
except Exception as e:
print(f"{lang} detection error: {e}")

return results

def _detect_with_east(self, image: np.ndarray, min_confidence: float) ->


List[Dict]:
"""EAST detector"""
results = []
try:
if 'east' not in self.detectors:
return results

net = self.detectors['east']
height, width = image.shape[:2]

# Prepare image for EAST


new_height, new_width = 320, 320
ratio_h, ratio_w = height / new_height, width / new_width

blob = cv2.dnn.blobFromImage(image, 1.0, (new_width, new_height),


(123.68, 116.78, 103.94), swapRB=True,
crop=False)

net.setInput(blob)
scores, geometry = net.forward(['feature_fusion/Conv_7/Sigmoid',
'feature_fusion/concat_3'])

# Decode predictions
boxes, confidences = self._decode_east_predictions(scores, geometry,
min_confidence)

# Apply NMS
indices = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, 0.4)

if len(indices) > 0:
for i in indices.flatten():
x, y, w, h = boxes[i]
# Scale back to original image
x = int(x * ratio_w)
y = int(y * ratio_h)
w = int(w * ratio_w)
h = int(h * ratio_h)

results.append({
'bbox': (x, y, x + w, y + h),
'confidence': confidences[i],
'method': 'east',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x, y + h)]
})

except Exception as e:
print(f"EAST detection error: {e}")

return results

def _decode_east_predictions(self, scores, geometry, min_confidence):


"""Decode EAST model predictions"""
boxes = []
confidences = []

height, width = scores.shape[2:4]

for y in range(height):
scores_data = scores[0, 0, y]
x_data0 = geometry[0, 0, y]
x_data1 = geometry[0, 1, y]
x_data2 = geometry[0, 2, y]
x_data3 = geometry[0, 3, y]
angles_data = geometry[0, 4, y]

for x in range(width):
if scores_data[x] < min_confidence:
continue

offset_x, offset_y = x * 4.0, y * 4.0


angle = angles_data[x]
cos = np.cos(angle)
sin = np.sin(angle)

h = x_data0[x] + x_data2[x]
w = x_data1[x] + x_data3[x]

end_x = int(offset_x + (cos * x_data1[x]) + (sin * x_data2[x]))


end_y = int(offset_y - (sin * x_data1[x]) + (cos * x_data2[x]))
start_x = int(end_x - w)
start_y = int(end_y - h)
boxes.append([start_x, start_y, int(w), int(h)])
confidences.append(float(scores_data[x]))

return boxes, confidences

def _detect_with_opencv(self, image: np.ndarray, min_confidence: float) ->


List[Dict]:
"""OpenCV-based text detection methods"""
results = []

try:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3
else image

# Method 1: MSER (Maximally Stable Extremal Regions)


mser = cv2.MSER_create(
_delta=2,
_min_area=30,
_max_area=8000,
_max_variation=0.25,
_min_diversity=0.2,
_max_evolution=200,
_area_threshold=1.01,
_min_margin=0.003,
_edge_blur_size=5
)

regions, _ = mser.detectRegions(gray)
for region in regions:
if len(region) > 10:
x, y, w, h = cv2.boundingRect(region)
aspect_ratio = w / h if h > 0 else 0
area = w * h

if (0.1 < aspect_ratio < 20 and 100 < area < 10000 and
w > 15 and h > 8):
results.append({
'bbox': (x, y, x + w, y + h),
'confidence': 0.6,
'method': 'mser',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x, y +
h)]
})

# Method 2: Contour-based detection


# Apply multiple preprocessing techniques
preprocessed = [
cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2),
cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV, 15, 4),
cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV +
cv2.THRESH_OTSU)[1]
]

for thresh in preprocessed:


# Morphological operations
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
processed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

contours, _ = cv2.findContours(processed, cv2.RETR_EXTERNAL,


cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:


area = cv2.contourArea(contour)
if 50 < area < 5000:
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / h if h > 0 else 0

if 0.2 < aspect_ratio < 15 and w > 10 and h > 8:


results.append({
'bbox': (x, y, x + w, y + h),
'confidence': 0.5,
'method': 'contour',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x,
y + h)]
})

except Exception as e:
print(f"OpenCV detection error: {e}")

return results

def _detect_manga_specific(self, image: np.ndarray, min_confidence: float) ->


List[Dict]:
"""Manga/comic specific text detection"""
results = []

try:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3
else image

# Speech bubble detection


results.extend(self._detect_speech_bubbles(gray))

# Sound effect detection (often has different characteristics)


results.extend(self._detect_sound_effects(gray))

# Handwritten text detection


results.extend(self._detect_handwritten_text(gray))

except Exception as e:
print(f"Manga-specific detection error: {e}")

return results

def _detect_speech_bubbles(self, gray: np.ndarray) -> List[Dict]:


"""Detect speech bubbles and text within them"""
results = []

try:
# Use HoughCircles to detect circular/oval speech bubbles
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 50,
param1=50, param2=30, minRadius=20,
maxRadius=200)
if circles is not None:
circles = np.round(circles[0, :]).astype("int")
for (x, y, r) in circles:
# Create bounding box around circle
bbox = (max(0, x - r), max(0, y - r),
min(gray.shape[1], x + r), min(gray.shape[0], y + r))

results.append({
'bbox': bbox,
'confidence': 0.4,
'method': 'speech_bubble',
'text': '',
'polygon': [(bbox[0], bbox[1]), (bbox[2], bbox[1]),
(bbox[2], bbox[3]), (bbox[0], bbox[3])]
})

# Detect rectangular speech bubbles


# Apply edge detection
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
kernel = np.ones((3, 3), np.uint8)
edges = cv2.dilate(edges, kernel, iterations=1)

contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL,


cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:


area = cv2.contourArea(contour)
if 500 < area < 20000: # Size filter for speech bubbles
# Approximate contour
epsilon = 0.02 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)

if len(approx) >= 4: # Roughly rectangular


x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / h if h > 0 else 0

if 0.3 < aspect_ratio < 5: # Reasonable aspect ratio


results.append({
'bbox': (x, y, x + w, y + h),
'confidence': 0.5,
'method': 'rect_bubble',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x,
y + h)]
})

except Exception as e:
print(f"Speech bubble detection error: {e}")

return results

def _detect_sound_effects(self, gray: np.ndarray) -> List[Dict]:


"""Detect sound effects text (often stylized)"""
results = []

try:
# Sound effects often have bold, stylized text
# Use different morphological operations
kernel_large = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
kernel_small = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))

# Apply tophat transform to detect bright text on dark background


tophat = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, kernel_large)

# Apply blackhat transform to detect dark text on bright background


blackhat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, kernel_large)

# Combine both
combined = cv2.add(tophat, blackhat)

# Threshold
_, thresh = cv2.threshold(combined, 10, 255, cv2.THRESH_BINARY)

# Find contours
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:


area = cv2.contourArea(contour)
if 100 < area < 8000:
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / h if h > 0 else 0

# Sound effects can have more varied aspect ratios


if 0.1 < aspect_ratio < 20 and w > 20 and h > 15:
results.append({
'bbox': (x, y, x + w, y + h),
'confidence': 0.4,
'method': 'sound_effect',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x, y +
h)]
})

except Exception as e:
print(f"Sound effect detection error: {e}")

return results

def _detect_handwritten_text(self, gray: np.ndarray) -> List[Dict]:


"""Detect handwritten text areas"""
results = []

try:
# Handwritten text often has more irregular patterns
# Use gradient-based detection
grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)

magnitude = np.sqrt(grad_x**2 + grad_y**2)


magnitude = np.uint8(magnitude / magnitude.max() * 255)

# Apply threshold
_, thresh = cv2.threshold(magnitude, 30, 255, cv2.THRESH_BINARY)

# Morphological operations
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:


area = cv2.contourArea(contour)
if 200 < area < 5000:
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / h if h > 0 else 0

if 0.3 < aspect_ratio < 8 and w > 25 and h > 15:


results.append({
'bbox': (x, y, x + w, y + h),
'confidence': 0.35,
'method': 'handwritten',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x, y +
h)]
})

except Exception as e:
print(f"Handwritten text detection error: {e}")

return results

def _merge_detections(self, detections: List[Dict]) -> List[Dict]:


"""Merge overlapping detections from different methods"""
if not detections:
return []

# Sort by confidence
detections.sort(key=lambda x: x['confidence'], reverse=True)

merged = []
used = set()

for i, detection in enumerate(detections):


if i in used:
continue

current = detection.copy()
current_bbox = detection['bbox']

# Find overlapping detections


overlaps = []
for j, other in enumerate(detections[i+1:], i+1):
if j in used:
continue

iou = self._calculate_iou(current_bbox, other['bbox'])


if iou > 0.3: # Overlap threshold
overlaps.append(j)

# Merge overlapping detections


if overlaps:
all_bboxes = [current_bbox] + [detections[j]['bbox'] for j in
overlaps]
merged_bbox = self._merge_bboxes(all_bboxes)
current['bbox'] = merged_bbox
# Update polygon
x1, y1, x2, y2 = merged_bbox
current['polygon'] = [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]

# Combine methods
methods = [current['method']] + [detections[j]['method'] for j in
overlaps]
current['method'] = '+'.join(set(methods))

# Use highest confidence


confidences = [current['confidence']] + [detections[j]
['confidence'] for j in overlaps]
current['confidence'] = max(confidences)

# Mark as used
used.update(overlaps)

merged.append(current)
used.add(i)

return merged

def _calculate_iou(self, bbox1: Tuple, bbox2: Tuple) -> float:


"""Calculate Intersection over Union of two bounding boxes"""
x1_1, y1_1, x2_1, y2_1 = bbox1
x1_2, y1_2, x2_2, y2_2 = bbox2

# Calculate intersection
x1_int = max(x1_1, x1_2)
y1_int = max(y1_1, y1_2)
x2_int = min(x2_1, x2_2)
y2_int = min(y2_1, y2_2)

if x2_int <= x1_int or y2_int <= y1_int:


return 0.0

intersection = (x2_int - x1_int) * (y2_int - y1_int)

# Calculate union
area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
union = area1 + area2 - intersection

return intersection / union if union > 0 else 0.0

def _merge_bboxes(self, bboxes: List[Tuple]) -> Tuple:


"""Merge multiple bounding boxes into one"""
x1_min = min(bbox[0] for bbox in bboxes)
y1_min = min(bbox[1] for bbox in bboxes)
x2_max = max(bbox[2] for bbox in bboxes)
y2_max = max(bbox[3] for bbox in bboxes)

return (x1_min, y1_min, x2_max, y2_max)

# ======================= ADVANCED INPAINTING =======================

class AdvancedInpainter:
"""Multi-method inpainting with quality optimization"""
def __init__(self):
self.setup_inpainting_models()
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def setup_inpainting_models(self):
"""Setup all available inpainting methods"""
print("🔧 Setting up inpainting models...")

self.inpainters = {}

# Stable Diffusion Inpainting


try:
from diffusers import StableDiffusionInpaintPipeline, DiffusionPipeline

model_id = "runwayml/stable-diffusion-inpainting"
self.inpainters['sd'] = StableDiffusionInpaintPipeline.from_pretrained(
model_id,
torch_dtype=torch.float16 if torch.cuda.is_available() else
torch.float32,
variant="fp16" if torch.cuda.is_available() else None,
use_safetensors=True
).to(self.device)

# Enable optimizations
if torch.cuda.is_available():
self.inpainters['sd'].enable_attention_slicing()
self.inpainters['sd'].enable_model_cpu_offload()
try:

self.inpainters['sd'].enable_xformers_memory_efficient_attention()
except:
pass

print("✅ Stable Diffusion inpainting loaded")

except Exception as e:
print(f"⚠️ Stable Diffusion loading failed: {e}")

# MAT (Mask-Aware Transformer) - if available


try:
self.setup_mat_inpainter()
except:
print("⚠️ MAT inpainter not available")

# LaMa (Large Mask Inpainting) - if available


try:
self.setup_lama_inpainter()
except:
print("⚠️ LaMa inpainter not available")

print(f"✅

You might also like