ProFound: Testing on Simulated Images

Aaron Robotham

2020-12-03

Get the latest version of ProFound and ProFit:

library(devtools)
install_github('asgr/ProFound')
install_github('ICRAR/ProFit')

Set global evaluate (basically TRUE for GitHub version and FALSE for CRAN):

evalglobal=FALSE

First of all we load ProFit and ProFound. We also need to use LaplacesDemon because it gives us the Pareto (power-law) distribution that we will use to realistically sample our magnitude ranges later.

library(ProFound)
library(ProFit)
library(LaplacesDemon)

Making Simulated Data

Next we generate a random image with 200 stars and 200 extended sources. The value used roughly correspoond to the source densities and magnitude distributions you might expect to find in a Z-band VIKING frame (this was used to derive the image statistics).

set.seed(666)

ExamplePSF=profitMakeGaussianPSF(fwhm=5)
ExamplePSF=ExamplePSF/sum(ExamplePSF)

Ngal=200
Nstar=200

model_test=list(
    sersic=list(
        xcen=runif(Ngal,0,1000),
        ycen=runif(Ngal,0,1000),
        mag=24-rpareto(Ngal,2),
        re=rpois(Ngal,5)+runif(Ngal),
        nser=runif(Ngal,1,4),
        ang=runif(Ngal,0,180),
        axrat=runif(Ngal,0.3,1),
        box=runif(Ngal,-0.3,0.3)
    ),
    pointsource=list(
        xcen=runif(Nstar,0,1000),
        ycen=runif(Nstar,0,1000),
        mag=24-rpareto(Nstar,1.5)
    )
)

model_test$sersic$mag[model_test$sersic$mag<15]=runif(length(which(model_test$sersic$mag<15)),15,22)
model_test$pointsource$mag[model_test$pointsource$mag<15]=runif(length(which(model_test$pointsource$mag<15)),15,22)

im_test<-profitMakeModel(modellist=model_test, psf=ExamplePSF, dim=c(1000,1000), magzero = 30)$z

We can take a peak at the raw model before adding on noise and sky etc:

magimage(im_test)

Next we add typical VIKING object shot-noise and sky noise:

im_test=im_test+rnorm(1e6,sd=sqrt(im_test))
im_test=im_test+rnorm(1e6,sd=10)

And we now add a random slightly complex sky:

set.seed(666)

model_sky=list(
    pointsource=list(
        xcen=runif(500,0,1000),
        ycen=runif(500,0,1000),
        mag=20-rpareto(500,1.5)
    )
)

im_sky<-profitMakeModel(modellist=model_sky, psf=ExamplePSF, dim=c(1000,1000), magzero = 30)$z
im_sky=im_sky+rnorm(1e6,2,10)
grid_sky=profoundMakeSkyGrid(im_sky, type='bicubic')$sky

im_test=im_test+grid_sky

Let’s have a look:

magimage(im_test)

magimage(im_test)
points(model_test$sersic$xcen, model_test$sersic$ycen, col='yellow')
points(model_test$pointsource$xcen, model_test$pointsource$ycen, col='green')

Deep Extraction With ProFound

Now we run ProFound with fairly standard settings (note we set the VIRCAM pixel scale):

magimage(im_test)

pro_test=profoundProFound(im_test, magzero=30, verbose=TRUE, plot=TRUE, boundstats=TRUE, pixscale=0.34, tolerance=2)
points(model_test$sersic$xcen, model_test$sersic$ycen, col='yellow')
points(model_test$pointsource$xcen, model_test$pointsource$ycen, col='green')

We can match back to our initial catalogue using coordmatch in celestial

testmatch_gals=coordmatch(cbind(model_test$sersic$xcen,model_test$sersic$ycen)/3600, pro_test$segstats[,c("xcen","ycen")]/3600,5)

testmatch_stars=coordmatch(cbind(model_test$pointsource$xcen,model_test$pointsource$ycen)/3600,pro_test$segstats[,c("xcen","ycen")]/3600,5)

And plot the difference in estimated magnitude for the point sources (red) and extended sources (blue):

magplot(model_test$sersic$mag[testmatch_gals$bestmatch$refID], model_test$sersic$mag[testmatch_gals$bestmatch$refID]-pro_test$segstats[testmatch_gals$bestmatch$compareID,'mag'], grid=TRUE, ylim=c(-1,1), col='blue')
points(pro_test$segstats$mag, pro_test$segstats$mag_err, pch='.')
points(pro_test$segstats$mag, -pro_test$segstats$mag_err, pch='.')

points(model_test$pointsource$mag[testmatch_stars$bestmatch$refID], model_test$pointsource$mag[testmatch_stars$bestmatch$refID]-pro_test$segstats[testmatch_stars$bestmatch$compareID,'mag'],col='red')

We can check how good our sky estimates were too:

magplot(density(pro_test$sky))
lines(density(grid_sky), col='red')

skyRMSerror=sd(pro_test$skyRMS)
maghist(pro_test$skyRMS, breaks=100)
abline(v=c(10-skyRMSerror,10,10+skyRMSerror), lty=c(3,2,3), col='red')

You can see there is a slight bias to a positive sky measurement- this is caused by the background of very faint currently undetected sources

Next we can make a new image where the detected objects are replaced with noise representing out estimated measurements:

im_test_sub=im_test
im_test_sub[pro_test$objects_redo==1]=rnorm(length(which(pro_test$objects_redo==1)), mean=pro_test$sky[pro_test$objects_redo==1], sd=pro_test$skyRMS[pro_test$objects_redo==1])

And we can plot this:

magimage(pro_test$objects_redo)
points(model_test$sersic$xcen, model_test$sersic$ycen, col='yellow')
points(model_test$pointsource$xcen, model_test$pointsource$ycen, col='green')

magimage(im_test_sub)

magimage(im_test_sub)
points(model_test$sersic$xcen, model_test$sersic$ycen, col='yellow')
points(model_test$pointsource$xcen, model_test$pointsource$ycen, col='green')

magimage(profoundImBlur(im_test_sub,5))

magimage(profoundImBlur(im_test_sub,5))
points(model_test$sersic$xcen, model_test$sersic$ycen, col='yellow')
points(model_test$pointsource$xcen, model_test$pointsource$ycen, col='green')

The next step is to run ProFound again but pushing down to detect fainter objects. We use the sky and sky-RMS from the first run, and then mask out region that have known high surface brightness objects.

The important parameter now is the skycut. somewhere between 0.1 and 0.2 recovers the very faint objecst we have left- too low and we see a lot of false-positive contamination, too high and our true-positive rate drops. Setting it to 0.15 appears to be about the sweet spot (this would need testing on different depth data with realistic source distributions etc).

magimage(im_test)

pro_test_sub=profoundProFound(image=im_test_sub, mask=pro_test$objects, sky=pro_test$sky, skyRMS=pro_test$skyRMS, magzero=30, skycut=0.15, pixcut=3, verbose=TRUE, plot=TRUE, boundstats=TRUE, pixscale=0.34, tolerance=2, sigma=5)
points(model_test$sersic$xcen, model_test$sersic$ycen, col='yellow',cex=0.5)
points(model_test$pointsource$xcen, model_test$pointsource$ycen, col='green',cex=0.5)

It is worth experimenting with the above, but it should be clear that with a few steps of detection of masking, very low surface brightness obects can be extracted with some confidence. To fully parameterise these faint objects a proper galaxy profile should be made using ProFit.

Now in practice for very faint extraction you would never use objects on the image edges since you will never have an ideal estimate of the sky. We can cut our catalogue down for this effect:

faintcat=pro_test_sub$segstats[pro_test_sub$segstats$Nborder==0,]

Now we can add our two catalogues (our bright pass and faint pass) back together and match back. Ntoe that the segIDs will be repeated (so be careful using these when catalogues have been combined), but the unique IDs will be unique.

finalcat=rbind(pro_test$segstats, faintcat)
dim(finalcat)

The final catalogue is 402 rows, which is encouraging since we created 400 objects.

We can match this fainter run back against the original catalogue:

testmatch_gals_fin=coordmatch(cbind(model_test$sersic$xcen,model_test$sersic$ycen)/3600,  finalcat[,c("xcen","ycen")]/3600,5)

testmatch_stars_fin=coordmatch(cbind(model_test$pointsource$xcen,model_test$pointsource$ycen)/3600, finalcat[,c("xcen","ycen")]/3600,5)

So some final stats. False-positive=FP (this is bad) and true-positive=TP (this is good).

Galaxy TP = length(testmatch_gals_fin\(bestmatch[,1])/Ngal Stars TP = length(testmatch_stars_fin\)bestmatch[,1])/Nstar Total TP = (length(testmatch_gals_fin\(bestmatch[,1])+length(testmatch_stars_fin\)bestmatch[,1]))/(Ngal+Nstar)

Total FP = (dim(finalcat)[1]-length(testmatch_gals_fin\(bestmatch[,1])-length(testmatch_stars_fin\)bestmatch[,1]))/(Ngal+Nstar)

So we recover 73% of true galaxies, 97% of true stars (i.e. 85% of all true sources) and suffer 17% false positives. This suggests we cannot push much harder on the detection side since most of the new objects will be FALSE (since TP ~ 1-FP, and it will only get harder from here).

We can make a final image with all structure removed, sky subtracted and divided through by the RMS.

im_test_fin=im_test_sub-pro_test_sub$sky
im_test_fin[pro_test_sub$objects_redo==1]=rnorm(length(which(pro_test_sub$objects_redo==1)), mean=0, sd=pro_test_sub$skyRMS[pro_test_sub$objects_redo==1])
im_test_fin=im_test_fin/pro_test_sub$skyRMS

To see whether we have removed (or subtracted out in the case of the sky) all structure we can look at pixel-to-pixel correlation and the 2D FFT (both returned as part of the profoundPixelCorrelation function):

magimage(profoundPixelCorrelation(im_test, skyRMS=10, plot=TRUE)$fft, xlab='kx (2pi/1000pix)', ylab='ky (2pi/1000pix)'); points(0,0,cex=5,col='red')

magimage(profoundPixelCorrelation(im_test_sub, skyRMS=10, plot=TRUE)$fft, xlab='kx (2pi/1000pix)', ylab='ky (2pi/1000pix)'); points(0,0,cex=5,col='red')

magimage(profoundPixelCorrelation(im_test_fin, skyRMS=1, plot=TRUE)$fft, xlab='kx (2pi/1000pix)', ylab='ky (1000/pix)'); points(0,0,cex=5,col='red')

magimage(profoundPixelCorrelation(matrix(rnorm(1e6, mean=0, sd=10), 1000), skyRMS=10, plot=TRUE)$fft, xlab='kx (2pi/1000pix)', ylab='ky (2pi/1000pix)'); points(0,0,cex=5,col='red')

The 2D FFT is complex to interpret, and profoundPixelCorrelation only returns the power component in the so-called ‘optical representation’, where low k modes are in the centre of the image, and high k modes are in the corners. Slightly counter-intuitively we see power excess in the centre (so low k modes). This is a typical feature of images where the structure is smooth without sharp edges, and in the case of astronomy images will tend to mean we have Gaussian-like structures present in the sky since the Fourier transform of a Guassian is a Gaussian. Since undetected sources will tend to be PSF-like in profile, this will create a visible excess in low k signal in the FFT.

It should be clear that doing a few iterative steps of removing sources and analysing the FFT until there is little low to moderate k-mode signal left is a reasonable route to blindly extracting sources.

In this case we were pushing the data to extremely low surface brightness. We can compare our segmented surface brightness levels to the actual image.

magplot(density(finalcat$SB_N100, na.rm=TRUE), xlab='Surface Brightness / mag/asec^-2', ylab='PDF')
abline(v=mean(pro_test$SBlim), col='red')

The performance of ProFound is obviously substantially better when you are nearer to the nominal surface limit of the image.

Despite running this demo in two phases above, it is in fact hard to do better than a deeper single pass (skycut=1 to skycut=0.9 is the only change here):

pro_test_2=profoundProFound(im_test, magzero=30, skycut=0.9, pixcut=3, verbose=TRUE, plot=TRUE, boundstats=TRUE, pixscale=0.34, tolerance=2)

Match back to the intrinsic catalogue as before:

testmatch_gals_2=coordmatch(cbind(model_test$sersic$xcen,model_test$sersic$ycen)/3600, pro_test_2$segstats[,c("xcen","ycen")]/3600,5)

testmatch_stars_2=coordmatch(cbind(model_test$pointsource$xcen,model_test$pointsource$ycen)/3600,pro_test_2$segstats[,c("xcen","ycen")]/3600,5)

Galaxy TP = length(testmatch_gals_2\(bestmatch[,1])/Ngal Stars TP = length(testmatch_stars_2\)bestmatch[,1])/Nstar Total TP = (length(testmatch_gals_2\(bestmatch[,1])+length(testmatch_stars_2\)bestmatch[,1]))/(Ngal+Nstar)

Total FP = (dim(pro_test_2\(segstats)[1]-length(testmatch_gals_2\)bestmatch[,1])-length(testmatch_stars_2$bestmatch[,1]))/(Ngal+Nstar)

Running it like this produces similar True-Positive rates, but a lower global False-Positive rate. The moral? If you know what you doing you might want to jump straight to a deeper extraction, but if you are uncertain then you can proceed in a few iterations, i.e. extract the bright sources you are confident about and then go back later to dig around in the noise to get some more.

Residual Correlation Structure

The pixel correlation plots give you some idea of whether there are more sources to be extracted from the data.

First we will re-run ProFound with a few different skycut levels:

pro_test_sc1=profoundProFound(im_test, magzero=30, skycut=1, plot=FALSE, boundstats=TRUE, pixscale=0.34, tolerance=2)
pro_test_sc2=profoundProFound(im_test, magzero=30, skycut=2, plot=FALSE, boundstats=TRUE, pixscale=0.34, tolerance=2)
pro_test_sc4=profoundProFound(im_test, magzero=30, skycut=4, plot=FALSE, boundstats=TRUE, pixscale=0.34, tolerance=2)

Next we can re make the correlation plots using the

profoundPixelCorrelation(im_test, objects = pro_test_sc1$objects_redo, sky=pro_test_sc1$sky, skyRMS=pro_test_sc1$skyRMS, plot=TRUE, ylim=c(-0.1,0.1))
profoundPixelCorrelation(im_test, objects = pro_test_sc2$objects_redo, sky=pro_test_sc2$sky, skyRMS=pro_test_sc2$skyRMS, plot=TRUE, ylim=c(-0.1,0.1))
profoundPixelCorrelation(im_test, objects = pro_test_sc4$objects_redo, sky=pro_test_sc4$sky, skyRMS=pro_test_sc4$skyRMS, plot=TRUE, ylim=c(-0.1,0.1))

It is worth describing what the above tells us. The solid lines show correlation between all non-masked non-object pixels at different scales. In general an excess correlation means we are on average seeing features at a certain scale. The solid line is actually sensitive to both negative and positive features, where the former could come from a biased sky-subtraction.

To help separate the two the profoundPixelCorrelation also returns the difference between the correlation of positive pixels (after sky subtraction) and negative pixels with the same lags (the dashed lines). If the solid line shows an excess and the dashed line is lower (or even on zero) then this is a good clue that the difference is due to a funky sky subtraction or even intrumental correlation structure (particularly true in the NIR), i.e. there is as much positive as negative correlation structure. This means we are probably not missing a significant number of real sources (which are only ever positive). If the dashed line sites above the solid line then it is a sure sign that there are more sources lurking since we see more correlation for positive than negative pixels.

It might be that in a real image the dashed lines can never be brought all the way down to zero, i.e. there are very faint sub-noise objects that are clustered on a typical scale that we can see in these plots, but have no hope of extracting from the image.

Making a Better Sky

The profoundSkySplitFFT allows us to make a better sky image by extracting residual structure not captured well by out bilinear/bicubic sky maps that use local box cars to estimate the sky.

The basic idea is you provide the original image, your current best effort sky and sky-RMS maps, and your object mask. The function makes an FFT or the image-sky where object pixels are replaced with Normal samples using the appropriate sky and sky-RMS values. If this is done well already then the image created will look like pure noise.

The FFT generated above is then clipped at a user defined scale which separates out low k-mode structure (possible sky) from higher k-mode structure (possible real objects). We can see how much difference this might make using a smoother bicubic sky:

pro_bicubic=profoundProFound(im_test, type='bicubic')
newsky=profoundSkySplitFFT(im_test, objects=pro_bicubic$objects_redo, sky=pro_bicubic$sky, skyRMS=pro_bicubic$skyRMS, skyscale=200)

We can see how this compares with the intrinsic and older sky:

magimage(grid_sky)
magimage(pro_bicubic$sky)
magimage(newsky$sky)

Not a huge visual difference, but we can see we now have smaller residuals with respect to the intrinsic sky:

magplot(density(newsky$sky-grid_sky, bw=0.1), col='red', grid=TRUE)
lines(density(pro_bicubic$sky-grid_sky, bw=0.1), col='blue')
legend('topright', legend=c('New - Intrinsic Sky', 'Old - Intrinsic Sky'), lty=1, col=c('red','blue'))

We can also look at the new high k-mode image to look for sources (effectively im_test-newsky$sky):

magimage(newsky$sky_hi)