;
; ; SOURCE: spider/docs/techs/recon1/Procs/verify-class-byview.spi ; ; PURPOSE: Classify particles assigned to each reference view ; ; USAGE: clean ; ./spider spi/dat @verify-class-byview ; OPTIONAL: clean ; ./spider spi/dat @verify-class-byview RESULTS=4 FIRST=63 LAST=83 ; ; REQUIRES: spider/docs/techs/recon1/Procs/verify-settings.spi ; ; INPUTS: (Where *** denotes group, ### denotes view, ??? denotes class) ; [params] ../params Parameter document (one) ; [ref_view_list] [rec_dir]/sel_proj List of projection views (one) ; [view_list] [view_dir]/prj###/sel_part_byv Particle selection by view doc files (one/view) ; [filtered_stack] [view_dir]/prj###/stkfilt Filtered particle image stacks (one/view) ; ; If using defocus groups: ; [df_group_list] ../Power_Spectra/order_defgrps Defocus group list (one) ; [ref_proj] [in_dir]/ref_proj_*** Reference-projection prefix (one/defocus-group) ; [ref_proj_nodef] [in_dir]/ref_proj_01 Reference-projection prefix (only one) ; ; OUTPUTS: ; [class_doc] [view_dir]/prj###/sel_part_byclass_??? Particle selection by class (one/class/view) ; [class_avg] [view_dir]/prj###/classavg??? Class-average files (one/class/view) ; [class_var] [view_dir]/prj###/classvar??? OPTIONAL variance file for each class (one/class/view) ; [eigen_img] [view_dir]/prj###/eigenimg??? Eigenimage template (one/view) ; [cas_prefix] [view_dir]/prj###/verify CA S output file prefix (several files) ; [eigenvalue_doc] [view_dir]/prj###/listeigenvalues List of eigenvalues (one/view) ; [class_stats_doc] [view_dir]/prj###/listclasses CCC for each class-average (one/view) ; [summary_doc] summary_classify Summary doc file (one) ; [class_log] log.classify Log file for screen output (one) ; ; ------------------- Parameters ------------------- [firstView] = 1 ; First reference-view [lastView] = -1 ; Last reference-view (<1 == all) [ca-pca] = 3 ; Correspondence analysis == 1, PCA == 2 , IPCA == 3 [parts-per-class] = 75 ; Particle-to-class ratio [min-2classes] = 40 ; Minimum number of particles for 2 classes [num-factors] = 9 ; Number of eigenvalues to use [save-eigenimg] = 0 ; Flag to save eigenimages (1 == save) (reconstituted images commented out) [label-dim] = 115 ; Window size (temporary) for labeling ; (Class number and size unlikely to fit in label after size-reduction.) ; Suggestions: 115 for 3-digit class# + 3-digit class-size, 128 for 3+4 or 4+3 [user-constant] = 0 ; Additive constant for correspondence analysis (0 == automatic) ; (CorAn only works on non-negative pixel values.) ; If unsure, check the range of a few particles or the noise-reference for positivity [save-variance] = 0 ; Flag to save variance (1 == save) ; --------------- END BATCH HEADER --------------- MD TR OFF ; Decrease results file output MD VB OFF ; Decrease results file output MD SET MP 1 ; Set common filenames & parameters @verify-settings [iter] = 0 ; Temporary filenames [temp_class_map] = 'tmpclassmap' ; Temp list of particles by assigned class [temp_mask] = '_6' ; Mask [temp_ref_proj] = '_5' ; Reduced reference-projection [temp_rec_pos] = '_91' ; Positive reconstituted image [temp_rec_neg] = '_93' ; Negative reconstituted image [temp_class_avg] = '_4' ; Class average [temp_class_var] = '_7' ; Class variance [temp_exp_avg] = '_2' ; Expanded average (to dimension LABEL-DIM) [temp_labeled_avg] = '_3' ; Expanded, labeled average MY FL ; Check if defocus-group list exists IQ FI [dfgrps-exist] [df_group_list] ; If using defocus groups, then IF ( [dfgrps-exist] == 1 ) THEN ; Get # of defocus-groups UD N [num-grps] [df_group_list] ; Get last defocus-group # UD [num-grps], [last-grp] [df_group_list] [ref_proj] = '[ref_proj_dfgrp][last-grp]' ; UNTESTED ELSE [ref_proj] = [ref_proj_nodef] ; Reference-projection prefix ENDIF ; Check for (optional) command-line ranges IF ([FIRST] .NE. 0) [firstView] = [FIRST] IF ([LAST] .NE. 0) [lastView] = [LAST] IF ( [lastView] < 1 ) THEN ; Get number of views FI X [lastView] [ref_proj] MAXIM ; UD N [lastView] ; [ref_view_list] ; ; (An 84th projection is listed in refangles.) ENDIF ; Diagnostic for results file [firstView] ; first view [lastView] ; last view [temp_summary] = 'tmpsummary_{%i0%[lastView]}' ; Temporary summary doc SYS echo " Will duplicate screen output to [class_log] for remote monitoring" ; echo SYS date SYS date >> [class_log] SYS echo " Using particles in [view_list].$DATEXT" SYS echo " Using particles in [view_list].$DATEXT" >> [class_log] ; GET PARTICLE DIMENSION ; get first view + particle# [one] = 1 UD IC [one], [view] [ref_view_list] UD IC [one], [firstPart] [view_list][view] ; get size of first image FI H [idimInput] [filtered_stack][view]@{%i0%[firstPart]} ; WAS [particle_imgs][firstPart] NX [label-ydim] = ([label-dim]+36)*[idimInput]/[label-dim] ; Label is 36 pixels tall ; Calculate mask radius [mask-radius] = int([idimInput]/2) ; Make circular mask MO [temp_mask] [idimInput],[idimInput] C ; _C_ircle [mask-radius] ; CHECK IF REFERENCES HAVE SAME DIMENSION AS EXPERIMENTAL IMAGES SYS echo " Using projections from: [ref_proj] for CCC" SYS echo " Using projections from: [ref_proj] for CCC" >> [class_log] ; Get image-dimension from first reference [view] = 1 FI H [idimRefs] [ref_proj]@{*[view]} NX ; Compare image-dimensions from params and header, check if downsampled IF ([idimRefs] .NE. [idimInput]) THEN ; compute ratio [ref-ratio] = [idimRefs]/[idimInput] ; If ratio is an integer, images are probably shrunk IF ([ref-ratio] .EQ. int([ref-ratio])) THEN SYS echo "Will (internally) shrink references by factor of {%i0%[ref-ratio]} (={%i0%[idimRefs]}/{%i0%[idimInput]})" SYS echo "Will (internally) shrink references by factor of {%i0%[ref-ratio]} (={%i0%[idimRefs]}/{%i0%[idimInput]})" >> [class_log] [shrinkrefs-yn] = 1 ; If ratio is not an integer ELSE ; don't know what to do SYS echo "ERROR:" SYS echo "Image dimension from particle [particle_imgs][firstPart], {***[idimInput]}, different from [ref_proj], {***[idimRefs]}" SYS echo "Fix and re-start" EN ENDIF ELSE [ref-ratio] = 1 ENDIF ; Initialize particle-counter [total-particles] = 0 ; Initialize SYS touch [summary_doc].$DATEXT ; Clean up pre-existing file DE [temp_summary] ; WAS [summary_doc] ; Label summary file SD / VIEWNUM NUM_PARTS MAX_CLASSSIZE NUM_CLASSES DAVIESBOULDIN [temp_summary] ; WAS [summary_doc] SYS echo -n " Beginning classification" SYS echo -n " Beginning classification" >> [class_log] IF ( [save-eigenimg]==1 ) THEN SYS echo " Will generate eigenimages" SYS echo " Will generate eigenimages" >> [class_log] ENDIF ; Loop through reference views DO [view-key] = [firstView],[lastView] ; Loop through ref views --------------- ; Get reference-view # UD IC [view-key], [view] [ref_view_list] DE [class_stats_doc][view]_unsort IQ FI [exists] [view_list][view] IF ( [exists] == 0 ) THEN [num-classes] = 0 [max-class-size] = 0 GOTO LB19 ENDIF ; Get number of particles in current reference view UD N [view-parts] [view_list][view] ; Initialize registers [max-class-size] = 0 ; Maximum class size [min-img] = 0 ; Image with minimum pixel value ; Skip unpopulated views IF ( [view-parts] == 0 ) THEN [num-classes] = 0 [max-class-size] = 0 GOTO LB19 ENDIF ; Clean up [class] = 1 DE A ; Delete file series [class_doc][view][class] ; First deleted file DE A [class_avg][view][class] DE A [class_var][view][class] DE [eigenvalue_doc][view] ; Shrink reference-projection IP [ref_proj]@{****[view]} ; Normalized reference-projection file (input) [temp_ref_proj] ; Reduced reference-projection file (output) [idimInput],[idimInput] ; WAS [reduction],[reduction] ; Determine number of classes to use [num-classes] = int(([view-parts]/[parts-per-class]) + 0.5) ; Trap for tiny classes IF ( [num-classes] < 2 ) THEN IF ( [view-parts] >= [min-2classes]) THEN ; Force two classes if greater than specified threshold [num-classes] = 2 ELSE [num-classes] = 1 ; Minimum number of classes will be 1 [class] = 1 ; Dummy variable: class # ; Sort particles by cross-correlation coefficient DOC REN [view_list][view] ; Selection doc file (unsorted) (input) [class_doc][view][class]_noccc ; Class doc file, sorted by CCC (output) SYS echo " View: {%I4%[view]} Has: {%I3%[num-classes]} class " SYS echo " View: {%I4%[view]} Has: {%I3%[num-classes]} class " >> [class_log] [class-parts] = [view-parts] GOTO LB3 ; Skip past correspondence analysis + classification ENDIF ENDIF ; If CA selected: IF ( [ca-pca] == 1 ) THEN ; Check for small pixel intensities ; Initialize minimum [overall-min] = 999999 ; Loop through particles DO [part-key5] = 1,[view-parts] ; Loop through particles -------- ; Get particle number UD IC [part-key5],[view-slice] [view_list][view] ; Get image min FS [filtered_stack][view]@******[view-slice] FI H [img-min] [filtered_stack][view]@******[view-slice] FMIN ; Header location for min ; Update if necessary IF ( [img-min] < [overall-min] ) THEN [overall-min] = [img-min] [min-img] = [view-slice] ENDIF ENDDO ; End particle-loop -------------------------- UD ICE ; Close document [view_list][view] ; If additive constant set to automatic, set it IF ( [overall-min] < 0.05 ) THEN IF ( [user-constant] == 0 ) [add-constant] = 0.05 - [overall-min] ELSE [add-constant] = [user-constant] ENDIF SYS echo " View: {%i4%[view]}, Minimum: {%f10.3%[overall-min]} at image: {******[min-img]}, Add constant: {%f11.4%[add-constant]}" SYS echo " View: {%i4%[view]}, Minimum: {%f10.3%[overall-min]} at image: {******[min-img]}, Add constant: {%f11.4%[add-constant]}" >> [class_log] ENDIF MY FL ; RUN MULTIVARIATE STATISICAL ANALYSIS IF ( [ca-pca] == 1 ) THEN ; Run correspondence analysis CA S [filtered_stack][view]@****** [view_list][view] [temp_mask] ; Mask [num-factors] ; Number of eigenvalues C ; Correspondence analysis [add-constant] [cas_prefix][view] ; File prefix (output) ENDIF IF ( [ca-pca] == 2) THEN ; Run iterative principle component analysis CA S [filtered_stack][view]@****** [view_list][view] [temp_mask] ; Mask [num-factors] ; Number of eigenvalues P ; Principle component analysis [cas_prefix][view] ; File prefix (output) ENDIF IF ( [ca-pca] == 3 ) THEN ; Run principle component analysis CA S [filtered_stack][view]@****** [view_list][view] ; Files (input) [temp_mask] ; Mask [num-factors] ; Number of eigenvalues I ; Iterative PCA [cas_prefix][view] ; File prefix (output) ENDIF ; Run K-means classification CL KM [km-bet],[km-win],[km-coleman],[km-harabasz],[km-db] [cas_prefix][view]_IMC ; File (input) [num-classes] ; Number of classes 1-[num-factors] ; Factors to use 0 ; No factor weighting 0 ; No random seed [class_doc][view]_noccc ; Temp class-list doc file template (output) [temp_class_map] ; Unused class membership file (output) IF ( [save-eigenimg] >= 1 ) THEN ; If eigenimage-flag is 1, then save ; GENERATE EIGENIMAGES (OPTIONALLY) ; ; Calculate dimensions for reconstituted images ; [double-idim] = [idimInput]*2 ; Double image-dimension ; [idim-plus1] = [idimInput]+1 ; Image-dimension + 1 ; Loop through eigenvalues DO [factor-num] = 1,[num-factors] ; Loop through eigenvalues ---------- ; If (I)PCA, extra question asked when generating eigenimages IF ( [ca-pca] .NE. 1 ) THEN CA SRE [cas_prefix][view] ; Files (input) N ; Subtract average? [factor-num] ; Factor [eigen_img][view][factor-num] ; File (output) ELSE ; Correspondence analysis CA SRE [cas_prefix][view] ; Files (input) [factor-num] [eigen_img][view][factor-num] ; File (output) ENDIF ; ; Generate positive reconstituted image ; CA SRA ; [cas_prefix][view] ; Files (input) ; [factor-num] ; 0.2 ; Eigenvalue ; [temp_rec_pos] ; File (output) ; ; ; pad image to twice the height ; PD ; [temp_rec_pos] ; Positive reconstituted image (input) ; [recon_img][factor-num] ; [idimInput],[double-idim] ; B ; Set background to Border ; 1,1 ; Top-left coordinates ; ; ; generate negative reconstituted image ; CA SRA ; [cas_prefix][view] ; Files (input) ; [factor-num] ; -0.2 ; Eigenvalue ; [temp_rec_neg] ; File (output) ; ; ; Insert negative reconstituted image into larger image ; IN ; [temp_rec_neg] ; Negative reconstituted image (input) ; [recon_img][factor-num] ; File (overwritten) (input) ; 1,[idim-plus1] ; X-,Y-coords SD [factor-num], [factor-num],[factor-num] [eigenvalue_doc][view] ; File (output) ENDDO ; End eigenvalue-loop ----------------------- SD E [eigenvalue_doc][view] ENDIF LB3 ; Skip to here if only one class ; GENERATE CLASS AVERAGES ; Loop through classes DO [class] = 1,[num-classes] ; ADD PARTICLE#, CCC, ETC. TO CLASS DOC ; Add info from selection doc DOC AND [view_list][view] [class_doc][view][class]_noccc [class_doc][view][class]_unsort 1 ; Column # to intersect: view-slice # ; Sort individual particles by CCC DOC SORT [class_doc][view][class]_unsort ; Temp vile : w/CCC, unsorted (input) [class_doc][view][class]_noccc ; Class doc file , sorted by CCROT (output) 4 ; Column # to sort: CCROT Y ; Renumber ; Get # of particles in class UD N [class-parts] [class_doc][view][class]_noccc ; If size greater than maximum, then update IF ( [class-parts] > [max-class-size]) [max-class-size] = [class-parts] SD / VIEW_WIN GLOBAL_NUM GRP_WIN CCROT MIRROR GRP_NUM VIEW [class_doc][view][class] ; File (output) SD E [class_doc][view][class] ; File (closed) [ccc-sum] = 0 ; Initialize cumulative CCC-sum ; Loop through particles DO [part-key6] = 1,[class-parts] ; Read view-slice#, other parameters, from selection file UD IC [part-key6], [view-slice],[global-part],[grp-slice],[ccrot],[mirror],[group-num],[view] [class_doc][view][class]_noccc ; File (input) ; Calculate CCC CC C [ccc] [temp_ref_proj] ; Reduced reference-projection (input) [filtered_stack][view]@******[view-slice] ; Particle (input) [temp_mask] ; Reduced mask (input) [ccc-sum] = [ccc-sum] + [ccc] ; Write to doc SD [part-key6], [view-slice],[global-part],[ccc],[ccrot],[mirror],[group-num],[grp-slice] [class_doc][view][class] ; File (output) ENDDO ; Clean up UD ICE [class_doc][view][class]_noccc ; File (closed) SD E [class_doc][view][class] ; File (closed) ; Calculate unlabeled average AS R [filtered_stack][view]@****** [class_doc][view][class] ; Class-list doc file (input) A ; All images [temp_class_avg] ; Class average file (output) [temp_class_var] ; Cclass variance file (output) ; LABEL AVERAGES ; Expand to fit text label, if necessary IP [temp_class_avg] ; Class-average file (input) [temp_exp_avg] ; Expanded class-average file (output) [label-dim],[label-dim] ; Get class size UD N [class-size] [class_doc][view][class] [total-particles] = [total-particles] + [class-size] ; Label with class number & size LA B [temp_exp_avg] ; Expanded class-average file (input) [temp_labeled_avg] ; Expanded, labeled class-average file (output) {***[class]},n={***[class-size]} ; Shrink back down IP [temp_labeled_avg] ; Expanded, labeled class-average file (input) [class_avg][view][class] ; File (output) [idimInput],[label-ydim] ; Delete variance if not needed IF ( [save-variance] == 1 ) THEN CP [temp_class_var] ; File (input) [class_var][view][class] ; File (output) ENDIF ; Calculate avg CCC [ccc-avg] = [ccc-sum]/[class-parts] ; Get standard deviation of the variance FS M [var-max],[var-min],[var-avg],[var-sd] [temp_class_var] ; File (input) [temp_mask] ; File reduced mask (input) ; Trap for variance of a single image IF ( [class-size] == 1 ) [var-sd]=999 ; Otherwise, the variance of a single image would be NaN ; Write CCC and variance-SD to doc file SD [class], [class],[ccc-avg],[var-sd] [class_stats_doc][view]_unsort ; File (output) ; Clean up intermediate doc files DE [class_doc][view][class]_noccc ; File (removed) DE [class_doc][view][class]_unsort ; File (removed) ENDDO ; End class-loop ------------------------------- SYS echo " View: {%I4%[view]} Classes: {%I3%[num-classes]} Max class size: {%I3%[max-class-size]}" SYS echo " View: {%I4%[view]} Classes: {%I3%[num-classes]} Max class size: {%I3%[max-class-size]}" >> [class_log] ; Close document SD E [class_stats_doc][view]_unsort ; File (output) DE [class_stats_doc][view] ; File (removed) SD / CLASSNUM CCC VARIANCE_SD [class_stats_doc][view] ; File (closed) SD E [class_stats_doc][view] ; File (closed) ; Sort by CCC DOC SORT A [class_stats_doc][view]_unsort ; File (input) [class_stats_doc][view] ; File (output) 2 ; Column for CCC Y ; Renumber DE [class_stats_doc][view]_unsort ; File (removed) ; Remove class-map doc containing list of particles by assigned class DE [temp_class_map] ; File (removed) SYS M rm -f [cas_prefix][view]_SEQ.$DATEXT ; rm -f [cas_prefix][view]_SET.$DATEXT ; rm -f [cas_prefix][view]_PIX.$DATEXT ; rm -f [cas_prefix][view]_MAS.$DATEXT ; . LB19 ; Skip to here if view unpopulated SD [view-key], [view],[view-parts],[max-class-size],[num-classes],[km-db] [temp_summary] ; WAS [summary_doc] ENDDO ; End reference-view loop -------------------- ; SD / TOTAL_PARTS ; [summary_doc] ; File (output) ; [dummy] = -[lastView] ; SD [dummy], [total-particles] ; [summary_doc] ; File (output) ; Close docs UD ICE [ref_view_list] ; File (closed) SD E [temp_summary] ; WAS [summary_doc] ; File (closed) SYS cat [temp_summary].$DATEXT >> [summary_doc].$DATEXT ; If submitted via qsub, then signal completion IF ([LAST] .NE. 0) THEN SYS \cp -f [temp_summary].$DATEXT [finished_class][LAST].$DATEXT ELSE DE [temp_summary] ENDIF SYS echo " Done -- classified through view {%i0%[lastView]}" SYS echo " Done -- classified through view {%i0%[lastView]}" >> [class_log] SYS date SYS date >> [class_log] EN ; Modified 2016-10-19 ; TO DO: write unsorted class doc in core ; TO DO: label averages in separate loop, while automatically setting LABELDIM ; 2016-06-03 (trs & hn) -- Duplicates STDOUT to log file instead of screen (useful for remote monitoring) ; 2016-03-23 (agl) -- Filenames altered, echo formatting altered ; 2016-02-09 (trs) -- Checks reduction factor automatically ; 2013-11-27 (trs) -- Bug fix with new syntax when view has single class ; 2013-10-23 (agl) -- New file names, modernized syntax & cosmetic ; 2012-03-01 (trs) -- Added #classes and Davies-Bouldin# to summary doc ; 2012-02-29 (trs) -- K-means uses same factors as CA/PCA (had been hard-wired to 9) ; 2009-07-13 (trs) -- Added summary-file output ; 2009-07-03 (trs) -- Prints maximum class size to screen ; 2009-06-22 (trs) -- Extra question added by CA SRE when using (I)PCA ; 2009-06-09 (trs) -- Can operate on non-consecutive views from list ; 2009-05-27 (trs) -- Keeping _EIG file from CA S ; 2009-05-26 (trs) -- Option to save eigenimages and reconstituted images, adapted from ca-pca.spi ; 2009-05-26 (trs) -- Number of eigenvalues now user-defined ; 2009-05-22 (trs) -- Uses selection doc in CA S instead of first N particles ; 2008-11-12 (trs) -- Now a parameter to specify CA, PCA, or IPCA ; 2007-11-27 (trs) -- Calculates averaged CCC instead of CCC of class-average ; 2007-11-27 (trs) -- Can force poorly-populated views into two classes ; 2007-10-11 (trs) -- Last reference-view now user-defined ; 2007-10-01 (trs) -- Defocus-group list now optional ; 2007-09-06 (trs) -- Lnput particles are now in stacks ; 2006-08-29 (trs) -- Gets last defocus-group number automatically ; 2006-07-27 (trs) -- Bug fix in reference-projection file-pattern ; 2006-04-05 (trs) -- Uses highest-defocus group projections for CCC ; 2006-02-06 (trs & pp) -- Updated for changes in projection-matching ; 2005-03-27 (trs) -- No longer needs how_many file ; 2005-01-27 (trs & gra) -- Deals with variances of single-image classes ; 2005-01-24 (trs & js) -- Bug fix in loop that checks for low intensities ; 2004-12-22 (trs) -- Checks for images with low intensities ; 2004-12-22 (trs & jsl) -- Prints standard deviation of the class-variance to stats document ; 2004-12-22 (trs) -- Hirst reference-view is user-defined, in case you need to resume ; 2004-05-11 (trs) -- Handles poorly-populated classes differently ; 2004-05-04 (trs) -- Added option to save/delete variance ; 2004-04-22 (trs) -- Sorts individual images by CCC (worst to best) ; 2004-04-14 (trs) -- Deletes unused 'CA S' output (all except _IMC) ;