@@ -71,129 +71,250 @@ public enum IntelligenceService {
7171 /// Uses multiple Vision APIs to gather detailed information about the image:
7272 /// - Image classification for scene and object identification
7373 /// - Text recognition for readable content
74- /// - Face detection for portrait photos
74+ /// - Face detection and landmarks for portraits
7575 /// - Human and animal detection for subjects
7676 /// - Saliency analysis for key regions of interest
7777 /// - Horizon detection for landscape orientation
7878 /// - Barcode detection for QR codes and barcodes
79+ /// - Document detection for papers and screenshots
7980 ///
8081 /// - Parameter cgImage: The image to analyze
81- /// - Returns: A comprehensive description of what's in the image
82+ /// - Returns: A JSON string with structured analysis data
8283 /// - Throws: If image analysis fails
8384 @available ( iOS 26 , * )
8485 public static func analyzeImage( _ cgImage: CGImage ) async throws -> String {
8586 let startTime = CFAbsoluteTimeGetCurrent ( )
8687
87- var descriptions : [ String ] = [ ]
88-
8988 // Create all analysis requests
9089 let classifyRequest = VNClassifyImageRequest ( )
9190 let textRequest = VNRecognizeTextRequest ( )
9291 textRequest. recognitionLevel = . accurate
92+ textRequest. usesLanguageCorrection = true
9393
9494 let faceRequest = VNDetectFaceRectanglesRequest ( )
95+ let faceLandmarksRequest = VNDetectFaceLandmarksRequest ( )
9596 let humanRequest = VNDetectHumanRectanglesRequest ( )
9697 let animalRequest = VNRecognizeAnimalsRequest ( )
9798 let saliencyRequest = VNGenerateAttentionBasedSaliencyImageRequest ( )
9899 let horizonRequest = VNDetectHorizonRequest ( )
99100 let barcodeRequest = VNDetectBarcodesRequest ( )
101+ let documentRequest = VNDetectDocumentSegmentationRequest ( )
100102
101103 // Perform all requests
102104 let handler = VNImageRequestHandler ( cgImage: cgImage, options: [ : ] )
103105 try handler. perform ( [
104106 classifyRequest,
105107 textRequest,
106108 faceRequest,
109+ faceLandmarksRequest,
107110 humanRequest,
108111 animalRequest,
109112 saliencyRequest,
110113 horizonRequest,
111- barcodeRequest
114+ barcodeRequest,
115+ documentRequest
112116 ] )
113117
118+ // Build structured analysis result
119+ var analysis : [ String : Any ] = [ : ]
120+
121+ // Image dimensions
122+ analysis [ " imageSize " ] = [
123+ " width " : cgImage. width,
124+ " height " : cgImage. height
125+ ]
126+
127+ let aspectRatio = Double ( cgImage. width) / Double( cgImage. height)
128+ if aspectRatio > 1.5 {
129+ analysis [ " orientation " ] = " landscape "
130+ } else if aspectRatio < 0.7 {
131+ analysis [ " orientation " ] = " portrait "
132+ } else {
133+ analysis [ " orientation " ] = " square "
134+ }
135+
114136 // 1. Scene/Object Classification
115137 if let classifications = classifyRequest. results? . prefix ( 5 ) {
116138 let labels = classifications
117139 . filter { $0. confidence > 0.3 }
118- . map { " \( $0. identifier. replacingOccurrences ( of: " _ " , with: " " ) ) ( \( Int ( $0. confidence * 100 ) ) %) " }
140+ . map { [
141+ " label " : $0. identifier. replacingOccurrences ( of: " _ " , with: " " ) ,
142+ " confidence " : Int ( $0. confidence * 100 )
143+ ] as [ String : Any ] }
119144 if !labels. isEmpty {
120- descriptions . append ( " Scene: \( labels . joined ( separator : " , " ) ) " )
145+ analysis [ " sceneClassification " ] = labels
121146 }
122147 }
123148
124- // 2. Subjects - Faces
125- if let faceObservations = faceRequest. results, !faceObservations. isEmpty {
126- let faceCount = faceObservations. count
127- let faceDesc = faceCount == 1 ? " 1 face " : " \( faceCount) faces "
128- descriptions. append ( " Subjects: \( faceDesc) detected " )
149+ // 2. Face Detection with Landmarks
150+ var facesData : [ [ String : Any ] ] = [ ]
151+ if let faceObservations = faceLandmarksRequest. results, !faceObservations. isEmpty {
152+ for face in faceObservations {
153+ var faceInfo : [ String : Any ] = [ : ]
154+
155+ // Position
156+ let bounds = face. boundingBox
157+ if bounds. origin. x < 0.33 {
158+ faceInfo [ " horizontalPosition " ] = " left "
159+ } else if bounds. origin. x > 0.66 {
160+ faceInfo [ " horizontalPosition " ] = " right "
161+ } else {
162+ faceInfo [ " horizontalPosition " ] = " center "
163+ }
164+
165+ if bounds. origin. y < 0.33 {
166+ faceInfo [ " verticalPosition " ] = " bottom "
167+ } else if bounds. origin. y > 0.66 {
168+ faceInfo [ " verticalPosition " ] = " top "
169+ } else {
170+ faceInfo [ " verticalPosition " ] = " middle "
171+ }
172+
173+ // Size (relative to image)
174+ let faceArea = bounds. width * bounds. height
175+ if faceArea > 0.25 {
176+ faceInfo [ " size " ] = " closeup "
177+ } else if faceArea > 0.1 {
178+ faceInfo [ " size " ] = " medium "
179+ } else {
180+ faceInfo [ " size " ] = " distant "
181+ }
182+
183+ // Landmarks details
184+ if let landmarks = face. landmarks {
185+ var landmarksInfo : [ String ] = [ ]
186+ if landmarks. faceContour != nil { landmarksInfo. append ( " face contour " ) }
187+ if landmarks. leftEye != nil { landmarksInfo. append ( " left eye " ) }
188+ if landmarks. rightEye != nil { landmarksInfo. append ( " right eye " ) }
189+ if landmarks. nose != nil { landmarksInfo. append ( " nose " ) }
190+ if landmarks. outerLips != nil { landmarksInfo. append ( " mouth " ) }
191+ faceInfo [ " detectedFeatures " ] = landmarksInfo
192+ }
193+
194+ facesData. append ( faceInfo)
195+ }
196+ analysis [ " faces " ] = [
197+ " count " : faceObservations. count,
198+ " details " : facesData
199+ ]
129200 }
130201
131- // 3. Subjects - Humans (full body)
202+ // 3. Human Detection (full body)
132203 if let humanObservations = humanRequest. results, !humanObservations. isEmpty {
133- let humanCount = humanObservations. count
134- let humanDesc = humanCount == 1 ? " 1 person " : " \( humanCount) people "
135-
136- // Only add if we didn't already mention faces, or if there are more humans than faces
137- if let faceCount = faceRequest. results? . count, humanCount > faceCount {
138- descriptions. append ( " Additional subjects: \( humanDesc) visible " )
139- } else if faceRequest. results? . isEmpty ?? true {
140- descriptions. append ( " Subjects: \( humanDesc) detected " )
204+ let humanData = humanObservations. map { observation -> [ String : Any ] in
205+ let bounds = observation. boundingBox
206+ return [
207+ " confidence " : Int ( observation. confidence * 100 ) ,
208+ " size " : bounds. width * bounds. height > 0.2 ? " prominent " : " background "
209+ ]
141210 }
211+ analysis [ " humans " ] = [
212+ " count " : humanObservations. count,
213+ " details " : humanData
214+ ]
142215 }
143216
144217 // 4. Animals
145218 if let animalObservations = animalRequest. results, !animalObservations. isEmpty {
146219 let animals = animalObservations
147220 . filter { $0. confidence > 0.5 }
148- . compactMap { observation -> String ? in
221+ . compactMap { observation -> [ String : Any ] ? in
149222 guard let label = observation. labels. first else { return nil }
150- return " \( label. identifier) ( \( Int ( label. confidence * 100 ) ) %) "
223+ return [
224+ " type " : label. identifier,
225+ " confidence " : Int ( label. confidence * 100 )
226+ ]
151227 }
152228 if !animals. isEmpty {
153- descriptions . append ( " Animals: \( animals. joined ( separator : " , " ) ) " )
229+ analysis [ " animals " ] = animals
154230 }
155231 }
156232
157233 // 5. Saliency (regions of interest)
158234 if let saliencyObservations = saliencyRequest. results as? [ VNSaliencyImageObservation ] ,
159235 let observation = saliencyObservations. first,
160236 let salientObjects = observation. salientObjects, !salientObjects. isEmpty {
161- descriptions. append ( " Key regions: \( salientObjects. count) area \( salientObjects. count == 1 ? " " : " s " ) of interest " )
237+ let regions = salientObjects. map { object -> [ String : Any ] in
238+ let bounds = object. boundingBox
239+ var position = " "
240+ if bounds. origin. x < 0.33 {
241+ position = " left "
242+ } else if bounds. origin. x > 0.66 {
243+ position = " right "
244+ } else {
245+ position = " center "
246+ }
247+ return [
248+ " position " : position,
249+ " confidence " : Int ( object. confidence * 100 )
250+ ]
251+ }
252+ analysis [ " regionsOfInterest " ] = [
253+ " count " : salientObjects. count,
254+ " regions " : regions
255+ ]
162256 }
163257
164- // 6. Horizon detection (indicates landscape/orientation)
258+ // 6. Horizon detection
165259 if let horizonObservations = horizonRequest. results, let horizon = horizonObservations. first {
166260 let angle = horizon. angle * 180 / . pi
167- if abs ( angle) > 5 { // Only mention if horizon is noticeably tilted
168- descriptions. append ( " Composition: horizon at \( Int ( angle) ) ° angle " )
261+ if abs ( angle) > 5 {
262+ analysis [ " horizon " ] = [
263+ " angle " : Int ( angle) ,
264+ " tilt " : angle > 0 ? " clockwise " : " counterclockwise "
265+ ]
169266 }
170267 }
171268
172269 // 7. Text content
173270 if let textObservations = textRequest. results, !textObservations. isEmpty {
174- let text = textObservations
175- . compactMap { $0. topCandidates ( 1 ) . first? . string }
176- . joined ( separator: " " )
177- if !text. isEmpty {
178- let truncatedText = String ( text. prefix ( 100 ) )
179- descriptions. append ( " Text: \" \( truncatedText) \( text. count > 100 ? " ... " : " " ) \" " )
271+ let textLines = textObservations. compactMap { observation -> [ String : Any ] ? in
272+ guard let text = observation. topCandidates ( 1 ) . first? . string else { return nil }
273+ return [
274+ " text " : text,
275+ " confidence " : Int ( observation. confidence * 100 )
276+ ]
277+ }
278+ if !textLines. isEmpty {
279+ let fullText = textLines. compactMap { $0 [ " text " ] as? String } . joined ( separator: " " )
280+ analysis [ " text " ] = [
281+ " fullText " : String ( fullText. prefix ( 500 ) ) ,
282+ " lineCount " : textLines. count,
283+ " lines " : textLines. prefix ( 10 )
284+ ]
180285 }
181286 }
182287
183288 // 8. Barcodes/QR codes
184289 if let barcodeObservations = barcodeRequest. results, !barcodeObservations. isEmpty {
185- let barcodeTypes = barcodeObservations. compactMap { $0. symbology. rawValue }
186- if !barcodeTypes. isEmpty {
187- descriptions. append ( " Codes: \( barcodeTypes. joined ( separator: " , " ) ) " )
290+ let barcodes = barcodeObservations. compactMap { barcode -> [ String : Any ] ? in
291+ var barcodeInfo : [ String : Any ] = [
292+ " type " : barcode. symbology. rawValue
293+ ]
294+ if let payload = barcode. payloadStringValue {
295+ barcodeInfo [ " payload " ] = payload
296+ }
297+ return barcodeInfo
188298 }
299+ analysis [ " barcodes " ] = barcodes
189300 }
190301
191- let description = descriptions. isEmpty
192- ? " Image analyzed "
193- : descriptions. joined ( separator: " ; " )
302+ // 9. Document detection
303+ if let documentObservations = documentRequest. results, !documentObservations. isEmpty {
304+ analysis [ " containsDocument " ] = true
305+ analysis [ " documentCount " ] = documentObservations. count
306+ }
307+
308+ // Convert to JSON string
309+ let jsonData = try JSONSerialization . data ( withJSONObject: analysis, options: [ . prettyPrinted, . sortedKeys] )
310+ guard let jsonString = String ( data: jsonData, encoding: . utf8) else {
311+ throw NSError ( domain: " IntelligenceService " , code: - 1 , userInfo: [
312+ NSLocalizedDescriptionKey: " Failed to convert analysis to JSON "
313+ ] )
314+ }
194315
195316 WPLogInfo ( " IntelligenceService.analyzeImage executed in \( ( CFAbsoluteTimeGetCurrent ( ) - startTime) * 1000 ) ms " )
196317
197- return description
318+ return jsonString
198319 }
199320}
0 commit comments