#### Load Libraries #### library(tesseract) library(magick) library(lubridate) library(tidyverse) library(exifr) # Search folder for all photos of domestic cats (files with "DOCA" in their name) CAT <- read_exif( list.files( path = "F:\\WMP_MammalImages\\", full.names = T, recursive = T, # search subfiles as well pattern = "DOCA" # only include files with DOCA in their name ), tags = c("SourceFile", "Make") # only save the file path name ) # Extract relevant data from previous folder organization CAT2 <- CAT %>% mutate(FILE = basename(CAT$SourceFile), WMP = basename(dirname(CAT$SourceFile)), YEAR = stringr::str_sub(CAT$SourceFile, start = 22, end = 25), NEW_FILE = as.character(NA) ) # Iterate through images ## Copy images to new folder row by row (AKA DO NOT FUCK WITH ORIGINAL FILES) ### Rename images as they are copied for (row in 1:nrow(CAT2)) { # Display iteration number print(row) ## Copy images to new folder row by row file.copy(from = CAT2[row,1], to = "C:\\Users\\John\\Google Drive\\DOCA\\", overwrite = T) ### Create new name for copied images CAT2[row, 6] <- stringr::str_c(CAT2[row, 5], CAT2[row, 4], row, # add the row number "DOCA", sep = "_") ### Rename images as they are copied file.rename( from = paste("C:\\Users\\John\\Google Drive\\DOCA\\", CAT2[row, 3], sep = ""), to = paste("C:\\Users\\John\\Google Drive\\DOCA\\", CAT2[row, 6], ".jpg", sep = "") ) } # Check files to make sure they all have the time stamp within a similar location # Separate images with different time stamp locations (i.e. different camera brands) # For me, this was "Bushnell", "Cuddleback", and "Other" # I had 315 Bushnell Images, 6 Cuddeback, and 6 Other # For the rest of this script, I'm focusing on the Bushnell photos # Now re-search your folder of interst CAT3 <- read_exif( list.files( path = "C:\\Users\\John\\Google Drive\\DOCA\\Bushnell", full.names = T, recursive = T, pattern = "DOCA" ), tags = c("SourceFile") # only save the file path name ) CAT4 <- CAT3 %>% mutate(FILE_NAME = basename(CAT3$SourceFile), #column 2 WMP = NA, #column 3 DATE = NA, #column 4 YEAR = NA, #column 5 MONTH = NA, #column 6 DAY = NA, #column 7 TIME = NA, #column 8 HOUR = NA, #column 9 MINUTE = NA, #column 10 RENAMED_FILE = NA) #column 11 CAT4$WMP <- stringr::str_sub(CAT4$FILE_NAME, start = 6, end = 14) # Create smaller dataset to practice on, if needed CAT_TEST <- slice(CAT4, 1:3) # Iterate through images for (row in 1:nrow(CAT4)) { # Display iteration print(row) # Load the first image to process CAT_IMAGE <- magick::image_read(as.character(CAT4[row, 1])) print(as.character(CAT4[row, 1])) # Crop the first image # Notes on cropping: CAT_IMAGE2 <- magick::image_crop(CAT_IMAGE, "774x96+2490+2352") # Extract Date-Time from cropped image using OCR DATE_TIME <- tesseract::ocr_data(CAT_IMAGE2) # Extract Date from Date-Time CAT4[row, 4] <- DATE_TIME[1, 1] # Date CAT4[row, 5] <- lubridate::year(lubridate::mdy(CAT4[row, 4])) # Year CAT4[row, 6] <- lubridate::month(lubridate::mdy(CAT4[row, 4])) # Month CAT4[row, 7] <- lubridate::day(lubridate::mdy(CAT4[row, 4])) # Day # Extract Time from Date-Time CAT4[row, 8] <- DATE_TIME[2, 1] # Time CAT4[row, 9] <- lubridate::hour(lubridate::hms(CAT4[row, 8])) # Hour CAT4[row, 10] <- lubridate::minute(lubridate::hms(CAT4[row, 8])) # Minute # Rename File using extracted Date-Time CAT4[row, 11] <- stringr::str_c(as.character(CAT4[row, 5]), # YEAR as.character(CAT4[row, 3]), # WMP as.character(CAT4[row, 6]), # MONTH as.character(CAT4[row, 7]), # DAY as.character(CAT4[row, 9]), # HOUR as.character(CAT4[row, 10]), # MINUTE "DOCA.jpg", sep = "_") file.rename( from = as.character(CAT4[row, 1]), # SourceFile to = paste( "C:\\Users\\John\\Google Drive\\DOCA\\Bushnell\\", as.character(CAT4[row, 11]), # RENAMED_FILE sep = "" ) ) cat("image renamed","\n\n") } write.table(CAT4, "clipboard-100000", row.names = F, sep = "\t")