Modify yuvscaler to scale to multiple processes by forking on each frame and ensuring the scaled frames are written back in the correct order, supporting up to 16 processes. Signed-off-by: Con Kolivas --- yuvscaler.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 135 insertions(+), 9 deletions(-) Index: yuvscaler/yuvscaler.c =================================================================== --- yuvscaler.orig/yuvscaler.c 2007-07-17 11:37:31.000000000 +1000 +++ yuvscaler/yuvscaler.c 2007-07-17 23:22:17.000000000 +1000 @@ -54,6 +54,7 @@ #endif #include +#include #include #include #include @@ -74,6 +75,7 @@ // For pointer address alignement #define ALIGNEMENT 16 // 16 bytes alignement for mmx registers in SIMD instructions for Pentium #define MAXWIDTHNEIGHBORS 16 +#define MAX_PROCS 16 float PI = 3.141592654; @@ -144,6 +146,7 @@ int infile = 0; // =0 for stdin (defau int algorithm = -1; // =0 for resample, and =1 for bicubic unsigned int specific = 0; // is >0 if a specific downscaling speed enhanced treatment of data is possible unsigned int mono = 0; // is =1 for monochrome output +int m_procs = 1; // Number of processes to fork for SMP hardware // Keywords for argument passing const char VCD_KEYWORD[] = "VCD"; @@ -203,7 +206,7 @@ void yuvscaler_print_usage (char *argv[]) { fprintf (stderr, - "usage: yuvscaler -I [input_keyword] -M [mode_keyword] -O [output_keyword] [-S 0|1] [-n p|s|n] [-v 0-2] [-h]\n" + "usage: yuvscaler -I [input_keyword] -M [mode_keyword] -O [output_keyword] [-S 0|1] [-n p|s|n] [-m N] [-v 0-2] [-h]\n" "yuvscaler UPscales or DOWNscales arbitrary-sized YUV frames coming from stdin (in YUV4MPEG 4:2:2 format)\n" "to a specified YUV frame sizes to stdout. Please use yuvcorrect for interlacing or color corrections\n" "\n" @@ -247,6 +250,7 @@ yuvscaler_print_usage (char *argv[]) "\t LOVCDSTILL to generate LOw-resolution VCD still images, not-interlaced/progressive frames, size 352x(PAL-288,NTSC-240)\n" "\t SIZE_WidthxHeight to generate frames of size WidthxHeight on output (multiple of 2, Height of 4 if interlaced)\n" "\n" + "-m Specify the number of processes to run concurrently as N (set to number of cpus would be optimal, default 1)\n" "-n (usually not necessary) if norm could not be determined from data flux, specifies the OUTPUT norm for VCD/SVCD p=pal,s=secam,n=ntsc\n" "-v Specifies the degree of verbosity: 0=quiet, 1=normal, 2=verbose/debug\n" "-h : print this lot!\n"); @@ -341,7 +345,8 @@ yuvscaler_print_information (y4m_stream_ output_skip_col_left, output_skip_col_right); } mjpeg_info ("frame rate: %.3f fps", Y4M_RATIO_DBL (frame_rate)); - + if (m_procs > 1) + mjpeg_info ("forking %d processes", m_procs); } // ************************************************************************************* @@ -524,6 +529,13 @@ handle_args_global (int argc, char *argv } break; + case 'm': + m_procs = atoi (optarg); + if (m_procs < 1 || m_procs > MAX_PROCS) + { + mjpeg_error_exit1 ("Processes must be [1..%d]", MAX_PROCS); + } + break; case 'n': // TV norm for SVCD/VCD output switch (*optarg) @@ -1143,6 +1155,77 @@ handle_args_dependent (int argc, char *a } +/* + * Simple silly multiple process forking to scale on multiple cpu hardware, + * implemented by Con Kolivas + */ + +/* Pipes main to/from processes */ +static int m2c[MAX_PROCS][2], c2m[MAX_PROCS][2]; + +inline ssize_t Read(int fd, void *buf, size_t count) +{ + ssize_t retval; + +retry: + retval = read(fd, buf, count); + if (retval == -1) { + if (errno == EINTR) + goto retry; + mjpeg_error_exit1("read error\n"); + } + return retval; +} + +inline ssize_t Write(int fd, const void *buf, size_t count) +{ + ssize_t retval; + +retry: + retval = write(fd, &buf, count); + if (retval == -1) { + if (errno == EINTR) + goto retry; + mjpeg_error_exit1("write error\n"); + } + return retval; +} + +/* A pseudo-semaphore for processes using a pipe */ +void wait_on(int pype) +{ + int retval, buf = 0; + + retval = Read(pype, &buf, sizeof(buf)); + if (retval == 0) + mjpeg_error_exit1("read returned 0\n"); +} + +void wakeup_with(int pype) +{ + int retval, buf = 1; + + retval = Write(pype, &buf, sizeof(buf)); + if (retval == 0) + mjpeg_error_exit1("write returned 0\n"); +} + +void init_pipe(int *pype) +{ + if (pipe(pype) == -1) + mjpeg_error_exit1("pipe error"); +} + +void init_pipes(int pypes) +{ + int i; + + for (i = 0; i < pypes; i++) { + init_pipe(&m2c[i][0]); + init_pipe(&c2m[i][0]); + } +} + // ************************************************************************************* // MAIN @@ -1152,6 +1235,8 @@ main (int argc, char *argv[]) { int input_fd = 0; int output_fd = 1; + pid_t child_pid; + int procs = 0, read_proc = 0, write_proc = 0; // DDD and time use // int input_fd = open("./yuvscaler.input",O_RDONLY); @@ -1208,6 +1293,7 @@ main (int argc, char *argv[]) y4m_init_stream_info (&out_streaminfo); y4m_init_frame_info (&frameinfo); + init_pipes(m_procs); // *************************************************************** // Get video stream informations (size, framerate, interlacing, sample aspect ratio). @@ -1866,10 +1952,14 @@ main (int argc, char *argv[]) blackout (input_y, input_u, input_v); frame_num++; - // Output Frame Header - if (y4m_write_frame_header (output_fd, &out_streaminfo, &frameinfo) != Y4M_OK) - goto out_error; + if ((child_pid = fork()) == -1) + mjpeg_error_exit1("fork error\n"); + procs++; + if (!child_pid) + goto parent; + + /* From here on this is the child process */ // *************** // SCALE THE FRAME @@ -1964,6 +2054,13 @@ main (int argc, char *argv[]) // END OF SCALE THE FRAME // ********************** + /* This process waits till it is asked to write */ + wait_on(m2c[read_proc][0]); + + // Output Frame Header + if (y4m_write_frame_header (output_fd, &out_streaminfo, &frameinfo) != Y4M_OK) + goto out_error; + // OUTPUT FRAME CONTENTS if (skip == 0) { @@ -2016,9 +2113,38 @@ main (int argc, char *argv[]) } } } - } + /* Wrote frame, wakeup parent to say this child is done */ + wakeup_with(c2m[read_proc][1]); + return 0; + /* The child process ends here */ + +parent: + /* Only start writing output once all processes have been given + * work, and ensure they write back in the correct order + */ + if (procs == m_procs) { + /* Wakeup child to tell it it's ok to write its frame */ + wakeup_with(m2c[write_proc][1]); + /* Wait for the child to tell us it has finished writing its frame */ + wait_on(c2m[write_proc][0]); + procs--; + if (++write_proc == m_procs) + write_proc = 0; + } + if (++read_proc == m_procs) + read_proc = 0; + } // End of master loop => no more frame in stdin + /* Clean up any remaining writes from processes waiting around */ + while (procs > 0) { + wakeup_with(m2c[write_proc][1]); + wait_on(c2m[write_proc][0]); + procs--; + if (++write_proc == m_procs) + write_proc = 0; + } + if (err != Y4M_ERR_EOF) mjpeg_error_exit1 ("Couldn't read frame number %ld!", frame_num); else