Parallel processing

This commit is contained in:
Jacob Zelek
2020-11-02 22:02:20 -08:00
parent e9dd9ab799
commit 4f635b68cc
2 changed files with 34 additions and 18 deletions

View File

@@ -45,6 +45,7 @@ Options:
<height> Height of each thumbnail. <height> Height of each thumbnail.
<columns> Total number of thumbnails per line. <columns> Total number of thumbnails per line.
<output> Output. <output> Output.
<parallelism> Number of files to process in parallel
``` ```
## Example ## Example

View File

@@ -16,6 +16,7 @@ Options:
<height> Height of each thumbnail. <height> Height of each thumbnail.
<columns> Total number of thumbnails per line. <columns> Total number of thumbnails per line.
<output> Output. <output> Output.
<parallelism> Number of files to process in parallel
""" """
from docopt import docopt from docopt import docopt
@@ -23,6 +24,7 @@ from moviepy.editor import VideoFileClip
from PIL import Image from PIL import Image
from click import progressbar from click import progressbar
from collections import namedtuple from collections import namedtuple
from multiprocessing import Pool, cpu_count
import glob import glob
import os import os
import random import random
@@ -39,11 +41,11 @@ def generate_video_thumbnails(args):
input_path = args['<video>'] input_path = args['<video>']
interval = int(args['<interval>']) interval = int(args['<interval>'])
size = (int(args['<width>']), int(args['<height>'])) size = (int(args['<width>']), int(args['<height>']))
output_prefix = get_output_prefix()
columns = int(args['<columns>']) columns = int(args['<columns>'])
output_path = args['<output>'] output_path = args['<output>']
parallelism = args.get('<parallelism>', cpu_count()*2-1)
file_paths = set() work_units = []
if os.path.isdir(input_path): if os.path.isdir(input_path):
# Ensure output path is also directory # Ensure output path is also directory
@@ -70,26 +72,38 @@ def generate_video_thumbnails(args):
seperator=os.sep, seperator=os.sep,
file_name=os.path.basename(file_path) file_name=os.path.basename(file_path)
) )
file_paths.add((file_path, single_output_path,)) work_units.append((file_path, single_output_path,
interval, size, columns,))
else: else:
file_paths.add((input_path, output_path,)) work_units.append((input_path, output_path, interval, size, columns,))
# Process all files sequentially # Limit the number of parallel jobs if lower number of files
for file_path in file_paths: parallelism = min(parallelism, len(work_units))
video_file_clip = VideoFileClip(file_path[0])
generate_frames(video_file_clip, interval, output_prefix, size) # Process all files in parallel
generate_sprite_from_frames(output_prefix, columns, size, file_path[1]) with Pool(parallelism) as p:
p.map(process_file, work_units)
def generate_frames(video_file_clip, interval, output_prefix, size): def process_file(work_unit):
input_file, output_file, interval, size, columns = work_unit
video_file_clip = VideoFileClip(input_file)
output_prefix = get_output_prefix()
file_name = os.path.basename(input_file)
generate_frames(file_name, video_file_clip, interval, output_prefix, size)
generate_sprite_from_frames(output_prefix, columns, size, output_file)
def generate_frames(file_name, video_file_clip, interval, output_prefix, size):
duration = video_file_clip.duration duration = video_file_clip.duration
print("Extracting", int(duration / interval), "frames")
frame_count = 0 frame_count = 0
with progressbar(range(0, int(duration), interval)) as items: total_frames = int(duration / interval)
for i in items: for i in range(0, int(duration), interval):
extract_frame(video_file_clip, i, output_prefix, size, frame_count) print("[{file_name}] Extracting frame {current}/{total}".
frame_count += 1 format(file_name=file_name, current=frame_count+1,
print("Frames extracted.") total=total_frames+1))
extract_frame(video_file_clip, i, output_prefix, size, frame_count)
frame_count += 1
def extract_frame(video_file_clip, moment, output_prefix, size, frame_count): def extract_frame(video_file_clip, moment, output_prefix, size, frame_count):
@@ -138,8 +152,9 @@ def generate_sprite_from_frames(frames_path, columns, size, output):
column = 0 column = 0
final_image.save(output) final_image.save(output)
shutil.rmtree(TMP_FRAMES_PATH, ignore_errors=True) shutil.rmtree(frames_path, ignore_errors=True)
print("Saved!") output_file = os.path.basename(output)
print("[{output_file}] Saved".format(output_file=output_file))
def get_output_prefix(): def get_output_prefix():