Android CameraX: Difference between revisions
(10 intermediate revisions by the same user not shown) | |||
Line 326: | Line 326: | ||
*Create a Preview Window | *Create a Preview Window | ||
*Enable the Analyzer Use Case | *Enable the Analyzer Use Case | ||
*Get the Model | |||
==Create a Preview Window== | ==Create a Preview Window== | ||
This has been done already | This has been done already above | ||
==Enable the Analyzer Use Case== | ==Enable the Analyzer Use Case== | ||
We do this by adding the | We do this by adding the IMAGE analysis to the enabled Use Cases | ||
<syntaxhighlight lang="kotlin"> | <syntaxhighlight lang="kotlin"> | ||
val controller = remember { | val controller = remember { | ||
Line 339: | Line 340: | ||
) | ) | ||
} | } | ||
} | |||
</syntaxhighlight> | |||
==Get the Model== | |||
Download the model from [[https://www.kaggle.com/models/google/landmarks Kaggle]]. This use to be known as tfhub but is now known as kaggle. The model format was shown in the video but since moving from tfhub this information seemed to be missing. | |||
==Make a data Class to Hold the Confidence== | |||
This will hold the result from the model | |||
<syntaxhighlight lang="kotlin"> | |||
data class Classification( | |||
val label: String, | |||
val confidence: Float | |||
) | |||
</syntaxhighlight> | |||
==Make an Interface for the Classifier== | |||
This is the interface to implement | |||
<syntaxhighlight lang="kotlin"> | |||
interface LandmarkClassifier { | |||
fun classify(bitmap: Bitmap, rotation: Int): List<Classification> | |||
} | |||
</syntaxhighlight> | |||
==Implement the Classifier== | |||
We need to make sure the image is orientated as the model expects. This held in the metadata on the original Tfhub but could not find it on kaggle. In the video we orientated it as shown. We also need to crop the image to the appropriate size which was also in the meta data I could not find on kapple | |||
<syntaxhighlight lang="kotlin"> | |||
class TfliteLandmarkClassifier( | |||
private val context: Context, | |||
private val threshold: Float = 0.5f, | |||
private val maxResults: Int = 1 | |||
) : LandmarkClassifier { | |||
private var classifier: ImageClassifier? = null | |||
private fun setupClassifier() { | |||
val baseOptions = BaseOptions.builder() | |||
.setNumThreads(2) | |||
.build() | |||
val options = ImageClassifier.ImageClassifierOptions.builder() | |||
.setBaseOptions(baseOptions) | |||
.setMaxResults(maxResults) | |||
.setScoreThreshold(threshold) | |||
.build() | |||
try { | |||
classifier = ImageClassifier.createFromFileAndOptions( | |||
context, | |||
"1.tflite", | |||
options | |||
) | |||
} catch (e: IllegalStateException) { | |||
e.printStackTrace() | |||
} | |||
} | |||
override fun classify(bitmap: Bitmap, rotation: Int): List<Classification> { | |||
if (classifier == null) { | |||
setupClassifier() | |||
} | |||
var imageProcessor = ImageProcessor.Builder().build() | |||
val tensorImage = imageProcessor.process(TensorImage.fromBitmap(bitmap)) | |||
val imageProcessingOptions = ImageProcessingOptions.builder() | |||
.setOrientation(getOrientationFromRotation(rotation)) | |||
.build() | |||
val results = classifier?.classify(tensorImage, imageProcessingOptions) | |||
val thisTime = results?.flatMap { classifications -> | |||
classifications.categories.map { category -> | |||
Classification( | |||
label = category.displayName, | |||
confidence = category.score | |||
) | |||
} | |||
// Make a dummy entry if no results | |||
}?.distinctBy { it.label } ?: listOf(Classification("No results", 10f)) | |||
Log.i("RESULTS", thisTime.first().label) | |||
return thisTime | |||
} | |||
private fun getOrientationFromRotation(rotation: Int): ImageProcessingOptions.Orientation { | |||
return when (rotation) { | |||
android.view.Surface.ROTATION_0 -> ImageProcessingOptions.Orientation.RIGHT_TOP | |||
android.view.Surface.ROTATION_90 -> ImageProcessingOptions.Orientation.TOP_LEFT | |||
android.view.Surface.ROTATION_180 -> ImageProcessingOptions.Orientation.RIGHT_BOTTOM | |||
else -> ImageProcessingOptions.Orientation.TOP_LEFT | |||
} | |||
} | |||
} | |||
</syntaxhighlight> | |||
==Cropping the Image== | |||
The image for the mode needed to be cropped to 321 squared. To do this we used this class. This is done by extending Bitmap. | |||
<syntaxhighlight lang="kotlin"> | |||
fun Bitmap.centreCrop(desiredWidth: Int, desiredHeight: Int): Bitmap { | |||
val x = (width - desiredWidth) / 2 | |||
val y = (height - desiredHeight) / 2 | |||
if (x < 0 || y < 0) { | |||
throw IllegalArgumentException("Desired dimensions are larger than the bitmap") | |||
} | |||
return Bitmap.createBitmap(this, x, y, desiredWidth, desiredHeight) | |||
} | |||
</syntaxhighlight> | |||
==Implement the Analyzer== | |||
Now we need to implement the Analyzer. We make sure it is only done once every second, | |||
<syntaxhighlight lang="kotlin"> | |||
class LandmarkImageAnalyzer( | |||
private val classifier: LandmarkClassifier, | |||
private val onResult: (List<Classification>) -> Unit | |||
): ImageAnalysis.Analyzer { | |||
private var frameSkipCounter = 0 | |||
override fun analyze(image: ImageProxy) { | |||
if (frameSkipCounter % 60 == 0) { | |||
val rotationDegree = image.imageInfo.rotationDegrees | |||
val bitmap = image | |||
.toBitmap() | |||
.centreCrop(321, 321) | |||
val classifications = classifier.classify(bitmap, rotationDegree) | |||
onResult(classifications) | |||
} | |||
frameSkipCounter++ | |||
image.close() | |||
} | |||
} | |||
</syntaxhighlight> | |||
==Display Results== | |||
This is about using compose mostly. We create the things we need to do it with | |||
<syntaxhighlight lang="kotlin"> | |||
var classifications by remember { | |||
mutableStateOf(emptyList<Classification>()) | |||
} | |||
val analyzer = remember { | |||
LandmarkImageAnalyzer( | |||
classifier = TfliteLandmarkClassifier( | |||
context = applicationContext | |||
), | |||
onResult = { | |||
classifications = it | |||
} | |||
) | |||
} | |||
val controller = remember { | |||
LifecycleCameraController(applicationContext).apply { | |||
setEnabledUseCases( | |||
CameraController.IMAGE_CAPTURE or | |||
CameraController.VIDEO_CAPTURE or | |||
CameraController.IMAGE_ANALYSIS | |||
) | |||
setImageAnalysisAnalyzer( | |||
ContextCompat.getMainExecutor(applicationContext), | |||
analyzer | |||
) | |||
} | |||
} | |||
</syntaxhighlight> | |||
Now we need to display the results. | |||
<syntaxhighlight lang="kotlin"> | |||
Box( | |||
modifier = Modifier | |||
.fillMaxSize() | |||
) { | |||
CameraPreview(controller,Modifier.fillMaxSize()) | |||
Column( | |||
modifier = Modifier | |||
.fillMaxWidth() | |||
.align(Alignment.TopCenter) | |||
) { | |||
classifications.forEach { | |||
Text( | |||
text = it.label, | |||
modifier = Modifier | |||
.fillMaxWidth() | |||
.background(MaterialTheme.colorScheme.primaryContainer) | |||
.padding(8.dp), | |||
textAlign = TextAlign.Center, | |||
fontSize = 20.sp, | |||
color = MaterialTheme.colorScheme.primary | |||
) | |||
} | |||
} | } | ||
</syntaxhighlight> | </syntaxhighlight> |
Latest revision as of 05:11, 7 March 2025
Introduction
Already had this working for my Food app but this is now broken so watching a tutorial and thought I better take notes
Setup Permissions
Make sure we give permissions in the Manifest
<uses-feature
android:name="android.hardware.camera"
android:required="false" />
<uses-permission android:name="android.permission.CAMERA" />
<uses-permission android:name="android.permission.RECORD_AUDIO" />
Implement Permission Checking
This is the simplest way to get going
class MainActivity : ComponentActivity() {
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
if (!hasRequiredPermissions()) {
requestPermissions(CAMERAX_PERMISSIONS, 0)
}
enableEdgeToEdge()
}
private fun hasRequiredPermissions(): Boolean {
return CAMERAX_PERMISSIONS.all {
ContextCompat.checkSelfPermission(this, it) == PackageManager.PERMISSION_GRANTED
}
}
companion object {
private val CAMERAX_PERMISSIONS = arrayOf(
android.Manifest.permission.CAMERA,
android.Manifest.permission.RECORD_AUDIO
)
}
@Composable
fun Greeting(name: String, modifier: Modifier = Modifier) {
Text(
text = "Hello $name!",
modifier = modifier
)
}
}
Compose
I am loving the new compose. It takes a lot of the tedium out of it all. This did not exist last time. Basically bye, bye XML. Lot of code here but you just make the bits and add the components inline. The CameraPreview is the implementation of the previewer in Xml. This
setContent {
CameraXTutTheme {
Scaffold {
val scaffoldState = rememberBottomSheetScaffoldState()
val controller = remember {
LifecycleCameraController(applicationContext).apply {
setEnabledUseCases(
CameraController.IMAGE_CAPTURE or
CameraController.VIDEO_CAPTURE
)
}
}
BottomSheetScaffold(
scaffoldState = scaffoldState,
sheetPeekHeight = 0.dp,
sheetContent = {
}
) { padding ->
Box(
modifier = Modifier
.fillMaxSize()
.padding(padding)
) {
CameraPreview(
controller,
Modifier.fillMaxSize()
)
IconButton(
onClick = {
controller.cameraSelector =
if (controller.cameraSelector == CameraSelector.DEFAULT_BACK_CAMERA) {
CameraSelector.DEFAULT_FRONT_CAMERA
} else {
CameraSelector.DEFAULT_BACK_CAMERA
}
},
modifier = Modifier.padding(16.dp)
) {
Icon(
imageVector = Icons.Default.Cameraswitch,
contentDescription = "Switch Camera"
)
}
Row(
modifier = Modifier
.fillMaxWidth()
.align(Alignment.BottomCenter)
.padding(16.dp),
horizontalArrangement = Arrangement.SpaceAround
) {
IconButton(
onClick = {
}
) {
Icon(
imageVector = Icons.Default.Photo,
contentDescription = "Open Gallery"
)
}
IconButton(
onClick = {
}
) {
Icon(
imageVector = Icons.Default.PhotoCamera,
contentDescription = "Take Photo"
)
}
}
}
}
}
}
}
Add A Panel (Sheet) to Display Taken Images
First we create a sheet to display the images take on, this really is an excuse to document something to remind me what to do
@Composable
fun PhotoBottomSheetContent(
bitmaps: List<Bitmap>,
modifier: Modifier
) {
if(bitmaps.isEmpty()) {
Box(
modifier = modifier
.padding(16.dp),
contentAlignment = Alignment.Center
)
{
Text ("No photos taken yet")
}
} else {
LazyVerticalStaggeredGrid(
columns = StaggeredGridCells.Fixed(2),
horizontalArrangement = Arrangement.spacedBy(16.dp),
verticalItemSpacing = 16.dp,
contentPadding = PaddingValues(16.dp),
modifier = modifier
) {
items(bitmaps) { bitmap ->
Image(
bitmap = bitmap.asImageBitmap(),
contentDescription = null,
modifier = Modifier
.clip(RoundedCornerShape(10.dp))
)
}
}
}
}
This is the code for the Panel is this photo
Taking A Photo
Glad I watched this course, the compose stuff looks like it will improve people speed a lot. Next take the photo and you can see this is now trivial.
Make A Model for the View
We create a model of type MutableStateFlow to store the collected images
class MainViewModel : ViewModel() {
private val _bitmaps = MutableStateFlow<List<Bitmap>>(emptyList())
val bitmaps = _bitmaps.asStateFlow()
fun onTakePhoto(bitmap: Bitmap) {
Log.i("CameraXTut", "Adding a photo")
_bitmaps.value += bitmap
}
Attach the onTakePhoto to the onPhotoTaken delegate
When we take a photo, it is added to the model
IconButton(
onClick = {
takePhoto(
controller = controller,
onPhotoTaken = viewModel::onTakePhoto
)
}
) {
Icon(
imageVector = Icons.Default.PhotoCamera,
contentDescription = "Take Photo"
)
}
Taking the Photo
And here is the function which takes the photo
private fun takePhoto(
controller: LifecycleCameraController,
onPhotoTaken: (Bitmap) -> Unit
) {
controller.takePicture(
ContextCompat.getMainExecutor(applicationContext),
object: OnImageCapturedCallback() {
override fun onCaptureSuccess(image: ImageProxy) {
onPhotoTaken(image.toBitmap())
Log.i("CameraXTut", "Taken a picture")
}
override fun onError(exception: ImageCaptureException) {
super.onError(exception)
Log.e("CameraXTut", "Error capturing image", exception)
}
}
)
}
Rotating and Flipping the image
When you take the photo you need to handle to flipping of the image based on the camera which is used.
override fun onCaptureSuccess(image: ImageProxy) {
// Flip the image if it's from the front camera
val isFrontCamera = controller.cameraSelector == CameraSelector.DEFAULT_FRONT_CAMERA
onPhotoTaken(rotateImage(image, isFrontCamera))
Log.i("CameraXTut", "Taken a picture")
}
We create a matrix, getting the number of degrees to rotate, then create a bitmap based on this matrix
private fun rotateImage(image: ImageProxy, flipImage: Boolean ): Bitmap {
val matrix = Matrix().apply {
postRotate(image.imageInfo.rotationDegrees.toFloat())
if(flipImage) {
postScale(-1f, 1f)
}
}
return Bitmap.createBitmap(
image.toBitmap(),
0,
0,
image.width,
image.height,
matrix,
true
)
}
Recording a Video
This could not be easier, its like they want you to use their product. Create a recording object.
private var recording: Recording? = null
Create a button
IconButton(
onClick = {
recordVideo(controller = controller)
}
) {
Icon(
imageVector = Icons.Default.Videocam,
contentDescription = "Record Video"
)
}
Write the function which, checks for permissions, either stops or starts video recording
@SuppressLint("MissingPermission")
private fun recordVideo(
controller: LifecycleCameraController,
) {
if(!hasRequiredPermissions()) {
requestPermissions(CAMERAX_PERMISSIONS, 0)
return
}
// Check if we're already recording, and stop if we are
if (recording != null) {
recording?.stop()
recording = null
return
}
val outputFile = File(filesDir, "video.mp4")
recording = controller.startRecording(
FileOutputOptions.Builder(outputFile).build(),
AudioConfig.create(true),
ContextCompat.getMainExecutor(applicationContext)
) { event ->
when (event) {
is VideoRecordEvent.Finalize -> {
if (event.hasError()) {
recording?.close()
recording = null
Toast.makeText(
applicationContext,
"Error recording video",
Toast.LENGTH_LONG
).show()
} else {
Toast.makeText(applicationContext, "Video saved", Toast.LENGTH_LONG).show()
}
}
}
}
}
Dipped my Toe into AI on Android
So this is my first faray into AI on android. I am going to plug in an Analyzer to the camera to recognize landmarks using a model on TensorFlow. So to do this we need to
- Create a Preview Window
- Enable the Analyzer Use Case
- Get the Model
Create a Preview Window
This has been done already above
Enable the Analyzer Use Case
We do this by adding the IMAGE analysis to the enabled Use Cases
val controller = remember {
LifecycleCameraController(applicationContext).apply {
setEnabledUseCases(
CameraController.IMAGE_CAPTURE or
CameraController.VIDEO_CAPTURE or
CameraController.IMAGE_ANALYSIS
)
}
}
Get the Model
Download the model from [Kaggle]. This use to be known as tfhub but is now known as kaggle. The model format was shown in the video but since moving from tfhub this information seemed to be missing.
Make a data Class to Hold the Confidence
This will hold the result from the model
data class Classification(
val label: String,
val confidence: Float
)
Make an Interface for the Classifier
This is the interface to implement
interface LandmarkClassifier {
fun classify(bitmap: Bitmap, rotation: Int): List<Classification>
}
Implement the Classifier
We need to make sure the image is orientated as the model expects. This held in the metadata on the original Tfhub but could not find it on kaggle. In the video we orientated it as shown. We also need to crop the image to the appropriate size which was also in the meta data I could not find on kapple
class TfliteLandmarkClassifier(
private val context: Context,
private val threshold: Float = 0.5f,
private val maxResults: Int = 1
) : LandmarkClassifier {
private var classifier: ImageClassifier? = null
private fun setupClassifier() {
val baseOptions = BaseOptions.builder()
.setNumThreads(2)
.build()
val options = ImageClassifier.ImageClassifierOptions.builder()
.setBaseOptions(baseOptions)
.setMaxResults(maxResults)
.setScoreThreshold(threshold)
.build()
try {
classifier = ImageClassifier.createFromFileAndOptions(
context,
"1.tflite",
options
)
} catch (e: IllegalStateException) {
e.printStackTrace()
}
}
override fun classify(bitmap: Bitmap, rotation: Int): List<Classification> {
if (classifier == null) {
setupClassifier()
}
var imageProcessor = ImageProcessor.Builder().build()
val tensorImage = imageProcessor.process(TensorImage.fromBitmap(bitmap))
val imageProcessingOptions = ImageProcessingOptions.builder()
.setOrientation(getOrientationFromRotation(rotation))
.build()
val results = classifier?.classify(tensorImage, imageProcessingOptions)
val thisTime = results?.flatMap { classifications ->
classifications.categories.map { category ->
Classification(
label = category.displayName,
confidence = category.score
)
}
// Make a dummy entry if no results
}?.distinctBy { it.label } ?: listOf(Classification("No results", 10f))
Log.i("RESULTS", thisTime.first().label)
return thisTime
}
private fun getOrientationFromRotation(rotation: Int): ImageProcessingOptions.Orientation {
return when (rotation) {
android.view.Surface.ROTATION_0 -> ImageProcessingOptions.Orientation.RIGHT_TOP
android.view.Surface.ROTATION_90 -> ImageProcessingOptions.Orientation.TOP_LEFT
android.view.Surface.ROTATION_180 -> ImageProcessingOptions.Orientation.RIGHT_BOTTOM
else -> ImageProcessingOptions.Orientation.TOP_LEFT
}
}
}
Cropping the Image
The image for the mode needed to be cropped to 321 squared. To do this we used this class. This is done by extending Bitmap.
fun Bitmap.centreCrop(desiredWidth: Int, desiredHeight: Int): Bitmap {
val x = (width - desiredWidth) / 2
val y = (height - desiredHeight) / 2
if (x < 0 || y < 0) {
throw IllegalArgumentException("Desired dimensions are larger than the bitmap")
}
return Bitmap.createBitmap(this, x, y, desiredWidth, desiredHeight)
}
Implement the Analyzer
Now we need to implement the Analyzer. We make sure it is only done once every second,
class LandmarkImageAnalyzer(
private val classifier: LandmarkClassifier,
private val onResult: (List<Classification>) -> Unit
): ImageAnalysis.Analyzer {
private var frameSkipCounter = 0
override fun analyze(image: ImageProxy) {
if (frameSkipCounter % 60 == 0) {
val rotationDegree = image.imageInfo.rotationDegrees
val bitmap = image
.toBitmap()
.centreCrop(321, 321)
val classifications = classifier.classify(bitmap, rotationDegree)
onResult(classifications)
}
frameSkipCounter++
image.close()
}
}
Display Results
This is about using compose mostly. We create the things we need to do it with
var classifications by remember {
mutableStateOf(emptyList<Classification>())
}
val analyzer = remember {
LandmarkImageAnalyzer(
classifier = TfliteLandmarkClassifier(
context = applicationContext
),
onResult = {
classifications = it
}
)
}
val controller = remember {
LifecycleCameraController(applicationContext).apply {
setEnabledUseCases(
CameraController.IMAGE_CAPTURE or
CameraController.VIDEO_CAPTURE or
CameraController.IMAGE_ANALYSIS
)
setImageAnalysisAnalyzer(
ContextCompat.getMainExecutor(applicationContext),
analyzer
)
}
}
Now we need to display the results.
Box(
modifier = Modifier
.fillMaxSize()
) {
CameraPreview(controller,Modifier.fillMaxSize())
Column(
modifier = Modifier
.fillMaxWidth()
.align(Alignment.TopCenter)
) {
classifications.forEach {
Text(
text = it.label,
modifier = Modifier
.fillMaxWidth()
.background(MaterialTheme.colorScheme.primaryContainer)
.padding(8.dp),
textAlign = TextAlign.Center,
fontSize = 20.sp,
color = MaterialTheme.colorScheme.primary
)
}
}