0% found this document useful (0 votes)

18 views13 pages

C) Le Script But Not Complet Partie 1

The document outlines a comprehensive project for removing text from manga and comics using advanced detection and inpainting techniques, fully compatible with Google Colab. It includes the installation of various dependencies, setup of multiple text detection methods (like EasyOCR, PaddleOCR, and OpenCV), and specialized detection for manga-specific elements such as speech bubbles and sound effects. The project aims to provide a robust solution for text removal in comic images through a multi-method approach.

Uploaded by

bobsviking22

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

18 views13 pages

C) Le Script But Not Complet Partie 1

Uploaded by

bobsviking22

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 13

# Complete Manga/Comic Text Removal Project

# Advanced solution with multiple detection methods and inpainting techniques

# Fully compatible with Google Colab

import os
import sys
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFilter, ImageEnhance
import torch
import torchvision.transforms as transforms
from pathlib import Path
import requests
import zipfile
import gdown
from typing import List, Tuple, Optional, Dict
import warnings
import json
import time
from tqdm import tqdm
import gc
warnings.filterwarnings('ignore')

# ======================= INSTALLATION SETUP =======================

def install_all_dependencies():
"""Complete dependency installation for Google Colab"""
print("Installing all required packages... This may take a few minutes.")

# Core packages
packages = [
"torch torchvision torchaudio --index-url
https://download.pytorch.org/whl/cu118",
"opencv-python-headless",
"pillow>=9.0.0",
"numpy>=1.21.0",
"matplotlib>=3.5.0",
"tqdm",
"scipy",
"scikit-image",
"scikit-learn"
]

# OCR packages
ocr_packages = [
"easyocr",
"paddlepaddle-gpu" if torch.cuda.is_available() else "paddlepaddle",
"paddleocr>=2.6.0"
]

# AI/ML packages
ai_packages = [
"transformers>=4.20.0",
"diffusers>=0.21.0",
"accelerate>=0.20.0",
"controlnet-aux",
"xformers" if torch.cuda.is_available() else "",
"segment-anything",
"ultralytics>=8.0.0"
]

# Additional utilities
util_packages = [
"imageio",
"imageio-ffmpeg",
"gradio",
"ipywidgets"
]

all_packages = packages + ocr_packages + ai_packages + util_packages

for package in all_packages:

if package: # Skip empty strings
try:
print(f"Installing {package}...")
os.system(f"pip install -q {package}")
except Exception as e:
print(f"Warning: Could not install {package}: {e}")

# Additional setup for specific packages

try:
import nltk
nltk.download('punkt', quiet=True)
except:
pass

print("✅ All dependencies installed successfully!")

# ======================= ADVANCED TEXT DETECTION =======================

class AdvancedTextDetector:
"""Multi-method text detection with manga/comic specialization"""

def __init__(self):
self.setup_all_detectors()
self.detection_cache = {}

def setup_all_detectors(self):
"""Initialize all available text detection methods"""
print("🔧 Setting up text detection models...")

# OCR Readers
self.detectors = {}

# EasyOCR setup
try:
import easyocr
self.detectors['easyocr'] = easyocr.Reader(
['en', 'ja', 'ko', 'zh', 'th', 'vi'],
gpu=torch.cuda.is_available()
)
print("✅ EasyOCR initialized")
except Exception as e:
print(f"⚠️ EasyOCR failed: {e}")

# PaddleOCR setup
try:
from paddleocr import PaddleOCR
self.detectors['paddle_en'] = PaddleOCR(
use_angle_cls=True,
lang='en',
show_log=False,
use_gpu=torch.cuda.is_available()
)
self.detectors['paddle_ch'] = PaddleOCR(
use_angle_cls=True,
lang='ch',
show_log=False,
use_gpu=torch.cuda.is_available()
)
print("✅ PaddleOCR initialized")
except Exception as e:
print(f"⚠️ PaddleOCR failed: {e}")

# CRAFT Text Detection (if available)

try:
self.setup_craft_detector()
except:
print("⚠️ CRAFT detector not available")

# OpenCV-based detectors
self.setup_opencv_detectors()

print(f"✅ Text detection setup complete! Available methods:

{list(self.detectors.keys())}")

def setup_craft_detector(self):
"""Setup CRAFT text detector for better comic text detection"""
try:
# Download CRAFT model if not exists
craft_path = "/content/craft_mlt_25k.pth"
if not os.path.exists(craft_path):
print("Downloading CRAFT model...")
url =
"https://github.com/clovaai/CRAFT-pytorch/releases/download/v1.0/craft_mlt_25k.pth"
os.system(f"wget -q {url} -O {craft_path}")

# Note: Full CRAFT implementation would go here

# For now, we'll use a placeholder
self.detectors['craft'] = None

except Exception as e:
print(f"CRAFT setup failed: {e}")

def setup_opencv_detectors(self):
"""Setup OpenCV-based text detection methods"""
# EAST Text Detector
try:
east_path = "/content/frozen_east_text_detection.pb"
if not os.path.exists(east_path):
print("Downloading EAST model...")
url =
"https://github.com/opencv/opencv_extra/raw/master/testdata/dnn/frozen_east_text_de
tection.pb"
os.system(f"wget -q {url} -O {east_path}")
self.detectors['east'] = cv2.dnn.readNet(east_path)
print("✅ EAST detector initialized")
except Exception as e:
print(f"⚠️ EAST detector failed: {e}")

def detect_text_comprehensive(self, image: np.ndarray,

min_confidence: float = 0.3) -> List[Dict]:
"""
Comprehensive text detection using all available methods

Returns:
List of detection dictionaries with bbox, confidence, method, text
"""
results = []

# Method 1: EasyOCR
if 'easyocr' in self.detectors:
results.extend(self._detect_with_easyocr(image, min_confidence))

# Method 2: PaddleOCR
if 'paddle_en' in self.detectors:
results.extend(self._detect_with_paddle(image, min_confidence))

# Method 3: EAST
if 'east' in self.detectors:
results.extend(self._detect_with_east(image, min_confidence))

# Method 4: OpenCV methods

results.extend(self._detect_with_opencv(image, min_confidence))

# Method 5: Manga-specific detection

results.extend(self._detect_manga_specific(image, min_confidence))

# Merge and filter results

merged_results = self._merge_detections(results)

return merged_results

def _detect_with_easyocr(self, image: np.ndarray, min_confidence: float) ->

List[Dict]:
"""EasyOCR detection"""
results = []
try:
detections = self.detectors['easyocr'].readtext(image)
for bbox, text, confidence in detections:
if confidence >= min_confidence:
bbox_array = np.array(bbox, dtype=np.int32)
x_min, y_min = np.min(bbox_array, axis=0)
x_max, y_max = np.max(bbox_array, axis=0)

results.append({
'bbox': (x_min, y_min, x_max, y_max),
'confidence': confidence,
'method': 'easyocr',
'text': text,
'polygon': bbox
})
except Exception as e:
print(f"EasyOCR detection error: {e}")
return results

def _detect_with_paddle(self, image: np.ndarray, min_confidence: float) ->

List[Dict]:
"""PaddleOCR detection"""
results = []

for lang in ['paddle_en', 'paddle_ch']:

if lang not in self.detectors:
continue

try:
ocr_results = self.detectors[lang].ocr(image, cls=True)
if ocr_results and ocr_results[0]:
for item in ocr_results[0]:
bbox, (text, confidence) = item
if confidence >= min_confidence:
bbox_array = np.array(bbox, dtype=np.int32)
x_min, y_min = np.min(bbox_array, axis=0)
x_max, y_max = np.max(bbox_array, axis=0)

results.append({
'bbox': (x_min, y_min, x_max, y_max),
'confidence': confidence,
'method': lang,
'text': text,
'polygon': bbox
})
except Exception as e:
print(f"{lang} detection error: {e}")

return results

def _detect_with_east(self, image: np.ndarray, min_confidence: float) ->

List[Dict]:
"""EAST detector"""
results = []
try:
if 'east' not in self.detectors:
return results

net = self.detectors['east']
height, width = image.shape[:2]

# Prepare image for EAST

new_height, new_width = 320, 320
ratio_h, ratio_w = height / new_height, width / new_width

blob = cv2.dnn.blobFromImage(image, 1.0, (new_width, new_height),

(123.68, 116.78, 103.94), swapRB=True,
crop=False)

net.setInput(blob)
scores, geometry = net.forward(['feature_fusion/Conv_7/Sigmoid',
'feature_fusion/concat_3'])

# Decode predictions
boxes, confidences = self._decode_east_predictions(scores, geometry,
min_confidence)

# Apply NMS
indices = cv2.dnn.NMSBoxes(boxes, confidences, min_confidence, 0.4)

if len(indices) > 0:
for i in indices.flatten():
x, y, w, h = boxes[i]
# Scale back to original image
x = int(x * ratio_w)
y = int(y * ratio_h)
w = int(w * ratio_w)
h = int(h * ratio_h)

results.append({
'bbox': (x, y, x + w, y + h),
'confidence': confidences[i],
'method': 'east',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x, y + h)]
})

except Exception as e:
print(f"EAST detection error: {e}")

return results

def _decode_east_predictions(self, scores, geometry, min_confidence):

"""Decode EAST model predictions"""
boxes = []
confidences = []

height, width = scores.shape[2:4]

for y in range(height):
scores_data = scores[0, 0, y]
x_data0 = geometry[0, 0, y]
x_data1 = geometry[0, 1, y]
x_data2 = geometry[0, 2, y]
x_data3 = geometry[0, 3, y]
angles_data = geometry[0, 4, y]

for x in range(width):
if scores_data[x] < min_confidence:
continue

offset_x, offset_y = x * 4.0, y * 4.0

angle = angles_data[x]
cos = np.cos(angle)
sin = np.sin(angle)

h = x_data0[x] + x_data2[x]
w = x_data1[x] + x_data3[x]

end_x = int(offset_x + (cos * x_data1[x]) + (sin * x_data2[x]))

end_y = int(offset_y - (sin * x_data1[x]) + (cos * x_data2[x]))
start_x = int(end_x - w)
start_y = int(end_y - h)
boxes.append([start_x, start_y, int(w), int(h)])
confidences.append(float(scores_data[x]))

return boxes, confidences

def _detect_with_opencv(self, image: np.ndarray, min_confidence: float) ->

List[Dict]:
"""OpenCV-based text detection methods"""
results = []

try:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3
else image

# Method 1: MSER (Maximally Stable Extremal Regions)

mser = cv2.MSER_create(
_delta=2,
_min_area=30,
_max_area=8000,
_max_variation=0.25,
_min_diversity=0.2,
_max_evolution=200,
_area_threshold=1.01,
_min_margin=0.003,
_edge_blur_size=5
)

regions, _ = mser.detectRegions(gray)
for region in regions:
if len(region) > 10:
x, y, w, h = cv2.boundingRect(region)
aspect_ratio = w / h if h > 0 else 0
area = w * h

if (0.1 < aspect_ratio < 20 and 100 < area < 10000 and
w > 15 and h > 8):
results.append({
'bbox': (x, y, x + w, y + h),
'confidence': 0.6,
'method': 'mser',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x, y +
h)]
})

# Method 2: Contour-based detection

# Apply multiple preprocessing techniques
preprocessed = [
cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2),
cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY_INV, 15, 4),
cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV +
cv2.THRESH_OTSU)[1]
]

for thresh in preprocessed:

# Morphological operations
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
processed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

contours, _ = cv2.findContours(processed, cv2.RETR_EXTERNAL,

cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:

area = cv2.contourArea(contour)
if 50 < area < 5000:
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / h if h > 0 else 0

if 0.2 < aspect_ratio < 15 and w > 10 and h > 8:

results.append({
'bbox': (x, y, x + w, y + h),
'confidence': 0.5,
'method': 'contour',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x,
y + h)]
})

except Exception as e:
print(f"OpenCV detection error: {e}")

return results

def _detect_manga_specific(self, image: np.ndarray, min_confidence: float) ->

List[Dict]:
"""Manga/comic specific text detection"""
results = []

try:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3
else image

# Speech bubble detection

results.extend(self._detect_speech_bubbles(gray))

# Sound effect detection (often has different characteristics)

results.extend(self._detect_sound_effects(gray))

# Handwritten text detection

results.extend(self._detect_handwritten_text(gray))

except Exception as e:
print(f"Manga-specific detection error: {e}")

return results

def _detect_speech_bubbles(self, gray: np.ndarray) -> List[Dict]:

"""Detect speech bubbles and text within them"""
results = []

try:
# Use HoughCircles to detect circular/oval speech bubbles
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 50,
param1=50, param2=30, minRadius=20,
maxRadius=200)
if circles is not None:
circles = np.round(circles[0, :]).astype("int")
for (x, y, r) in circles:
# Create bounding box around circle
bbox = (max(0, x - r), max(0, y - r),
min(gray.shape[1], x + r), min(gray.shape[0], y + r))

results.append({
'bbox': bbox,
'confidence': 0.4,
'method': 'speech_bubble',
'text': '',
'polygon': [(bbox[0], bbox[1]), (bbox[2], bbox[1]),
(bbox[2], bbox[3]), (bbox[0], bbox[3])]
})

# Detect rectangular speech bubbles

# Apply edge detection
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
kernel = np.ones((3, 3), np.uint8)
edges = cv2.dilate(edges, kernel, iterations=1)

contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL,

cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:

area = cv2.contourArea(contour)
if 500 < area < 20000: # Size filter for speech bubbles
# Approximate contour
epsilon = 0.02 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)

if len(approx) >= 4: # Roughly rectangular

x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / h if h > 0 else 0

if 0.3 < aspect_ratio < 5: # Reasonable aspect ratio

results.append({
'bbox': (x, y, x + w, y + h),
'confidence': 0.5,
'method': 'rect_bubble',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x,
y + h)]
})

except Exception as e:
print(f"Speech bubble detection error: {e}")

return results

def _detect_sound_effects(self, gray: np.ndarray) -> List[Dict]:

"""Detect sound effects text (often stylized)"""
results = []

try:
# Sound effects often have bold, stylized text
# Use different morphological operations
kernel_large = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
kernel_small = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))

# Apply tophat transform to detect bright text on dark background

tophat = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, kernel_large)

# Apply blackhat transform to detect dark text on bright background

blackhat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, kernel_large)

# Combine both
combined = cv2.add(tophat, blackhat)

# Threshold
_, thresh = cv2.threshold(combined, 10, 255, cv2.THRESH_BINARY)

# Find contours
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:

area = cv2.contourArea(contour)
if 100 < area < 8000:
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / h if h > 0 else 0

# Sound effects can have more varied aspect ratios

if 0.1 < aspect_ratio < 20 and w > 20 and h > 15:
results.append({
'bbox': (x, y, x + w, y + h),
'confidence': 0.4,
'method': 'sound_effect',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x, y +
h)]
})

except Exception as e:
print(f"Sound effect detection error: {e}")

return results

def _detect_handwritten_text(self, gray: np.ndarray) -> List[Dict]:

"""Detect handwritten text areas"""
results = []

try:
# Handwritten text often has more irregular patterns
# Use gradient-based detection
grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)

magnitude = np.sqrt(grad_x2 + grad_y2)

magnitude = np.uint8(magnitude / magnitude.max() * 255)

# Apply threshold
_, thresh = cv2.threshold(magnitude, 30, 255, cv2.THRESH_BINARY)

# Morphological operations
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)

for contour in contours:

area = cv2.contourArea(contour)
if 200 < area < 5000:
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = w / h if h > 0 else 0

if 0.3 < aspect_ratio < 8 and w > 25 and h > 15:

results.append({
'bbox': (x, y, x + w, y + h),
'confidence': 0.35,
'method': 'handwritten',
'text': '',
'polygon': [(x, y), (x + w, y), (x + w, y + h), (x, y +
h)]
})

except Exception as e:
print(f"Handwritten text detection error: {e}")

return results

def _merge_detections(self, detections: List[Dict]) -> List[Dict]:

"""Merge overlapping detections from different methods"""
if not detections:
return []

# Sort by confidence
detections.sort(key=lambda x: x['confidence'], reverse=True)

merged = []
used = set()

for i, detection in enumerate(detections):

if i in used:
continue

current = detection.copy()
current_bbox = detection['bbox']

# Find overlapping detections

overlaps = []
for j, other in enumerate(detections[i+1:], i+1):
if j in used:
continue

iou = self._calculate_iou(current_bbox, other['bbox'])

if iou > 0.3: # Overlap threshold
overlaps.append(j)

# Merge overlapping detections

if overlaps:
all_bboxes = [current_bbox] + [detections[j]['bbox'] for j in
overlaps]
merged_bbox = self._merge_bboxes(all_bboxes)
current['bbox'] = merged_bbox
# Update polygon
x1, y1, x2, y2 = merged_bbox
current['polygon'] = [(x1, y1), (x2, y1), (x2, y2), (x1, y2)]

# Combine methods
methods = [current['method']] + [detections[j]['method'] for j in
overlaps]
current['method'] = '+'.join(set(methods))

# Use highest confidence

confidences = [current['confidence']] + [detections[j]
['confidence'] for j in overlaps]
current['confidence'] = max(confidences)

# Mark as used
used.update(overlaps)

merged.append(current)
used.add(i)

return merged

def _calculate_iou(self, bbox1: Tuple, bbox2: Tuple) -> float:

"""Calculate Intersection over Union of two bounding boxes"""
x1_1, y1_1, x2_1, y2_1 = bbox1
x1_2, y1_2, x2_2, y2_2 = bbox2

# Calculate intersection
x1_int = max(x1_1, x1_2)
y1_int = max(y1_1, y1_2)
x2_int = min(x2_1, x2_2)
y2_int = min(y2_1, y2_2)

if x2_int <= x1_int or y2_int <= y1_int:

return 0.0

intersection = (x2_int - x1_int) * (y2_int - y1_int)

# Calculate union
area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
union = area1 + area2 - intersection

return intersection / union if union > 0 else 0.0

def _merge_bboxes(self, bboxes: List[Tuple]) -> Tuple:

"""Merge multiple bounding boxes into one"""
x1_min = min(bbox[0] for bbox in bboxes)
y1_min = min(bbox[1] for bbox in bboxes)
x2_max = max(bbox[2] for bbox in bboxes)
y2_max = max(bbox[3] for bbox in bboxes)

return (x1_min, y1_min, x2_max, y2_max)

# ======================= ADVANCED INPAINTING =======================

class AdvancedInpainter:
"""Multi-method inpainting with quality optimization"""
def __init__(self):
self.setup_inpainting_models()
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def setup_inpainting_models(self):
"""Setup all available inpainting methods"""
print("🔧 Setting up inpainting models...")

self.inpainters = {}

# Stable Diffusion Inpainting

try:
from diffusers import StableDiffusionInpaintPipeline, DiffusionPipeline

model_id = "runwayml/stable-diffusion-inpainting"
self.inpainters['sd'] = StableDiffusionInpaintPipeline.from_pretrained(
model_id,
torch_dtype=torch.float16 if torch.cuda.is_available() else
torch.float32,
variant="fp16" if torch.cuda.is_available() else None,
use_safetensors=True
).to(self.device)

# Enable optimizations
if torch.cuda.is_available():
self.inpainters['sd'].enable_attention_slicing()
self.inpainters['sd'].enable_model_cpu_offload()
try:

self.inpainters['sd'].enable_xformers_memory_efficient_attention()
except:
pass

print("✅ Stable Diffusion inpainting loaded")

except Exception as e:
print(f"⚠️ Stable Diffusion loading failed: {e}")

# MAT (Mask-Aware Transformer) - if available

try:
self.setup_mat_inpainter()
except:
print("⚠️ MAT inpainter not available")

# LaMa (Large Mask Inpainting) - if available

try:
self.setup_lama_inpainter()
except:
print("⚠️ LaMa inpainter not available")

print(f"✅

F) Maybe Is Full Script Complet
No ratings yet
F) Maybe Is Full Script Complet
35 pages
4-Channel YOLO Training Guide For RGB+IR Drone Detection
No ratings yet
4-Channel YOLO Training Guide For RGB+IR Drone Detection
22 pages
Image Caption2
No ratings yet
Image Caption2
9 pages
CVDL Tae 63
No ratings yet
CVDL Tae 63
9 pages
EasyOCR Reader Initialization Guide
No ratings yet
EasyOCR Reader Initialization Guide
8 pages
Detect
No ratings yet
Detect
6 pages
Wa0029.
No ratings yet
Wa0029.
11 pages
Detection ORIGINAL
No ratings yet
Detection ORIGINAL
3 pages
Img Proc
No ratings yet
Img Proc
2 pages
DETECTCAMERA
No ratings yet
DETECTCAMERA
3 pages
导入所需库
No ratings yet
导入所需库
20 pages
Yolo Detect
No ratings yet
Yolo Detect
5 pages
DR Basit Assignments
No ratings yet
DR Basit Assignments
13 pages
Demo Inference Note
No ratings yet
Demo Inference Note
15 pages
Python Code 3
No ratings yet
Python Code 3
17 pages
Stable Diffusion Report Updated
No ratings yet
Stable Diffusion Report Updated
19 pages
Word Extraction-1
No ratings yet
Word Extraction-1
2 pages
Aishwarya MiniProjectReport - SC
No ratings yet
Aishwarya MiniProjectReport - SC
6 pages
Hand Written Letter Recognition
No ratings yet
Hand Written Letter Recognition
14 pages
Python Project
No ratings yet
Python Project
2 pages
CD 601 Lab Manual
No ratings yet
CD 601 Lab Manual
61 pages
21bai1724 - Ex 05 06
No ratings yet
21bai1724 - Ex 05 06
18 pages
IndicTrans2 PDF to Punjabi Docx Conversion
No ratings yet
IndicTrans2 PDF to Punjabi Docx Conversion
5 pages
Import Cv2
No ratings yet
Import Cv2
5 pages
Nndlrepo
No ratings yet
Nndlrepo
2 pages
Csc413 Project Semantic Segmentation
No ratings yet
Csc413 Project Semantic Segmentation
84 pages
Codeyolov 5
No ratings yet
Codeyolov 5
16 pages
Font Image Augmentation & Model Training
No ratings yet
Font Image Augmentation & Model Training
78 pages
Huggin Face Code
No ratings yet
Huggin Face Code
3 pages
PR Writing
No ratings yet
PR Writing
21 pages
Implementation of Handwritten Digit Recognizer Using CNN: Vinjit, Bhojak, Kumar and Nikam
No ratings yet
Implementation of Handwritten Digit Recognizer Using CNN: Vinjit, Bhojak, Kumar and Nikam
9 pages
Import cv2
No ratings yet
Import cv2
6 pages
Capstone Project Report (Digit-Recognition Using CNN)
No ratings yet
Capstone Project Report (Digit-Recognition Using CNN)
11 pages
Python Handwriting Recognition Guide
No ratings yet
Python Handwriting Recognition Guide
31 pages
Def Set Random Seed (Seed)
No ratings yet
Def Set Random Seed (Seed)
29 pages
Optical Character Recognition (OCR) in Python
No ratings yet
Optical Character Recognition (OCR) in Python
110 pages
Project Guidelines - AIML
No ratings yet
Project Guidelines - AIML
30 pages
Val
No ratings yet
Val
9 pages
Base Nao Mudar
No ratings yet
Base Nao Mudar
10 pages
Pasted Text 1756308794495
No ratings yet
Pasted Text 1756308794495
32 pages
PyTorch Cheat Sheet & Quick Reference
No ratings yet
PyTorch Cheat Sheet & Quick Reference
6 pages
Final Question1 With Results
No ratings yet
Final Question1 With Results
21 pages
GPU Setup for ML Libraries
No ratings yet
GPU Setup for ML Libraries
1 page
Genai 1,2,3
No ratings yet
Genai 1,2,3
15 pages
Lab 4-Image Segmentation Using U-Net
No ratings yet
Lab 4-Image Segmentation Using U-Net
9 pages
Deep Learning Manual
No ratings yet
Deep Learning Manual
53 pages
CV Ex 7
No ratings yet
CV Ex 7
7 pages
Deep Learning OCR Python Resources
No ratings yet
Deep Learning OCR Python Resources
3 pages
Experimental Pix2pix
No ratings yet
Experimental Pix2pix
5 pages
Extraction of Information From Handwriting Using Optical Character Recognition and Neural Networks
No ratings yet
Extraction of Information From Handwriting Using Optical Character Recognition and Neural Networks
6 pages
Lab Record
No ratings yet
Lab Record
30 pages
Step by Step Process
No ratings yet
Step by Step Process
8 pages
Run 1
No ratings yet
Run 1
57 pages
JJJ
No ratings yet
JJJ
6 pages
Finalised Question 1
No ratings yet
Finalised Question 1
40 pages
LSTM Autoencoder
No ratings yet
LSTM Autoencoder
8 pages
CV Lab Manual
No ratings yet
CV Lab Manual
45 pages
Vit32 GPTMD
No ratings yet
Vit32 GPTMD
6 pages
Operating Manual-Sx60-100 Om 090824
No ratings yet
Operating Manual-Sx60-100 Om 090824
112 pages
Windows System Error Codes
No ratings yet
Windows System Error Codes
304 pages
CA 13 VectorProcessors
No ratings yet
CA 13 VectorProcessors
16 pages
System On Chip
No ratings yet
System On Chip
12 pages
Pure Mathematics Coordinate Geometry Project
No ratings yet
Pure Mathematics Coordinate Geometry Project
25 pages
Server Administration and Management
No ratings yet
Server Administration and Management
3 pages
WM 2024
No ratings yet
WM 2024
6 pages
Dsei30 06a
No ratings yet
Dsei30 06a
3 pages
Chapter 4
No ratings yet
Chapter 4
6 pages
UserGuide10 PDF
No ratings yet
UserGuide10 PDF
494 pages
RAK7431 RS485 To LoRaWAN Converter Specifications V1.2
No ratings yet
RAK7431 RS485 To LoRaWAN Converter Specifications V1.2
12 pages
Math Homework Sheets For 6th Graders
No ratings yet
Math Homework Sheets For 6th Graders
7 pages
RX1 Getting Started
No ratings yet
RX1 Getting Started
60 pages
MG5050 Installation Guide
No ratings yet
MG5050 Installation Guide
1 page
CV Varsha Gupta 2 (1) (1) .7 Years Exp
No ratings yet
CV Varsha Gupta 2 (1) (1) .7 Years Exp
4 pages
GE3151 - Python
No ratings yet
GE3151 - Python
2 pages
How To Easily Generate Sales Funnels and Growth Hack Your Business Using ClickFunnels - Kev Chavez - Your Keen & Crisp VP
50% (8)
How To Easily Generate Sales Funnels and Growth Hack Your Business Using ClickFunnels - Kev Chavez - Your Keen & Crisp VP
103 pages
Abb E-Clipse Bypass Configurations (BCR, BDR, VCR, or VDR) For Ach 550 User Manual
No ratings yet
Abb E-Clipse Bypass Configurations (BCR, BDR, VCR, or VDR) For Ach 550 User Manual
100 pages
The Chronicles of Riddick PC Game Download
No ratings yet
The Chronicles of Riddick PC Game Download
2 pages
Bits ZG553 Ec-2r First Sem 2019-2020
No ratings yet
Bits ZG553 Ec-2r First Sem 2019-2020
2 pages
Cybersecurity Case Studies
No ratings yet
Cybersecurity Case Studies
7 pages
BIA-Aligned Recovery Matrix
No ratings yet
BIA-Aligned Recovery Matrix
1 page
SHS Grade 11 MIL Q4W6 FINAL
No ratings yet
SHS Grade 11 MIL Q4W6 FINAL
19 pages
Solution HW4
No ratings yet
Solution HW4
5 pages
Hdo6000a Operators Manual
No ratings yet
Hdo6000a Operators Manual
212 pages
iDS-7200HQHI-M2/S SERIES Turbo Acusense DVR: Key Feature
No ratings yet
iDS-7200HQHI-M2/S SERIES Turbo Acusense DVR: Key Feature
4 pages
Database Design Assignment Guide
No ratings yet
Database Design Assignment Guide
4 pages
Chord Implementation Using RMI
0% (1)
Chord Implementation Using RMI
8 pages
Gamayas Portfolio
No ratings yet
Gamayas Portfolio
17 pages
ATV600 Communication Parameters EAV64332 V3.6
No ratings yet
ATV600 Communication Parameters EAV64332 V3.6
324 pages

C) Le Script But Not Complet Partie 1

Uploaded by

C) Le Script But Not Complet Partie 1

Uploaded by

# Complete Manga/Comic Text Removal Project

# Advanced solution with multiple detection methods and inpainting techniques

# ======================= INSTALLATION SETUP =======================

all_packages = packages + ocr_packages + ai_packages + util_packages

for package in all_packages:

# Additional setup for specific packages

print("✅ All dependencies installed successfully!")

# ======================= ADVANCED TEXT DETECTION =======================

# CRAFT Text Detection (if available)

print(f"✅ Text detection setup complete! Available methods:

# Note: Full CRAFT implementation would go here

def detect_text_comprehensive(self, image: np.ndarray,

# Method 4: OpenCV methods

# Method 5: Manga-specific detection

# Merge and filter results

def _detect_with_easyocr(self, image: np.ndarray, min_confidence: float) ->

def _detect_with_paddle(self, image: np.ndarray, min_confidence: float) ->

for lang in ['paddle_en', 'paddle_ch']:

def _detect_with_east(self, image: np.ndarray, min_confidence: float) ->

# Prepare image for EAST

blob = cv2.dnn.blobFromImage(image, 1.0, (new_width, new_height),

def _decode_east_predictions(self, scores, geometry, min_confidence):

height, width = scores.shape[2:4]

offset_x, offset_y = x * 4.0, y * 4.0

end_x = int(offset_x + (cos * x_data1[x]) + (sin * x_data2[x]))

return boxes, confidences

def _detect_with_opencv(self, image: np.ndarray, min_confidence: float) ->

# Method 1: MSER (Maximally Stable Extremal Regions)

# Method 2: Contour-based detection

for thresh in preprocessed:

contours, _ = cv2.findContours(processed, cv2.RETR_EXTERNAL,

for contour in contours:

if 0.2 < aspect_ratio < 15 and w > 10 and h > 8:

def _detect_manga_specific(self, image: np.ndarray, min_confidence: float) ->

# Speech bubble detection

# Sound effect detection (often has different characteristics)

# Handwritten text detection

def _detect_speech_bubbles(self, gray: np.ndarray) -> List[Dict]:

# Detect rectangular speech bubbles

contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL,

for contour in contours:

if len(approx) >= 4: # Roughly rectangular

if 0.3 < aspect_ratio < 5: # Reasonable aspect ratio

def _detect_sound_effects(self, gray: np.ndarray) -> List[Dict]:

# Apply tophat transform to detect bright text on dark background

# Apply blackhat transform to detect dark text on bright background

for contour in contours:

# Sound effects can have more varied aspect ratios

def _detect_handwritten_text(self, gray: np.ndarray) -> List[Dict]:

magnitude = np.sqrt(grad_x**2 + grad_y**2)

for contour in contours:

if 0.3 < aspect_ratio < 8 and w > 25 and h > 15:

def _merge_detections(self, detections: List[Dict]) -> List[Dict]:

for i, detection in enumerate(detections):

# Find overlapping detections

iou = self._calculate_iou(current_bbox, other['bbox'])

# Merge overlapping detections

# Use highest confidence

def _calculate_iou(self, bbox1: Tuple, bbox2: Tuple) -> float:

if x2_int <= x1_int or y2_int <= y1_int:

intersection = (x2_int - x1_int) * (y2_int - y1_int)

return intersection / union if union > 0 else 0.0

def _merge_bboxes(self, bboxes: List[Tuple]) -> Tuple:

return (x1_min, y1_min, x2_max, y2_max)

# ======================= ADVANCED INPAINTING =======================

# Stable Diffusion Inpainting

print("✅ Stable Diffusion inpainting loaded")

# MAT (Mask-Aware Transformer) - if available

# LaMa (Large Mask Inpainting) - if available

You might also like

magnitude = np.sqrt(grad_x2 + grad_y2)