Synthetic Data Generation

#1

Are there any examples or tutorials on how to generate synthetic data for mockup / demo purposes?

0 Likes

#2

I’ve seen it done with JavaScript scripts and R scripts - used to generate .csv files with data used in the Application.

0 Likes

#3

an example, in R

###############     DATA GENERATION FOR LIGHTBULB INVENTORY OPTIMIZATION     ###############     


setwd("/Users/jeremyjoslove/Box Sync/jeremy.joslove/Lightbulb Inventory Optimization App/App/Data")
library(reshape)


#fixNum <- 5000
#fixPerApartNum <- 20
#aptPerBuildNum <- 10
#buildPerWar <- 10

fixNum <- 300
fixPerApartNum <- 10
aptPerBuildNum <- 6
buildPerWar <- 2

### Fixture.csv data file

fixture <- seq(1,fixNum,1)
apartments <- (fixture - 1) %/% fixPerApartNum + 1 # 20 fixtures per apartment
buildings <- (apartments - 1) %/% aptPerBuildNum + 1 # 10 apartments per building

# Generating latitudes and longitudes in a way that fixtures that are in the same apartment are close to each other
minLat <- 37.47809215
maxLat <- 37.49240941
minLong <- -122.2451554
maxLong <- -122.2289799
N = length(unique(apartments))
latitude <- minLat + (maxLat - minLat) * apartments / N - (maxLat - minLat) / (2*N)
latitude <- runif(latitude, latitude - (maxLat - minLat) / (4*N),  latitude + (maxLat - minLat) / (4*N) )
longitude <- minLong + (maxLong - minLong) * apartments / N - (maxLong - minLong) / (2*N)
longitude <- runif(longitude, longitude - (maxLong - minLong) / (4*N),  longitude + (maxLong - minLong) / (4*N) )

fixtureFile <- data.frame(fixture = paste0('fixt',fixture), apartment = paste0('apt',apartments), building = paste0('bld',buildings),
                          latitude = latitude, longitude = longitude)

write.csv(fixtureFile, file = "Fixture_reduced.csv",row.names=F)


### BuildingWarehouse.csv file

unique_buildings <- unique(buildings)
warehouse <- (unique_buildings -1) %/% buildPerWar + 1
failureCostPerBulb <- runif(length(warehouse),90,110)
buildingWarehouseFile <- data.frame(building = paste0('bld',unique_buildings), warehouse = paste0('warh',warehouse), 
                                     FailureCostPerBulb = failureCostPerBulb)
write.csv(buildingWarehouseFile, file = "BuildingWarehouse_reduced.csv",row.names=F)


### Warehouse.csv file

warehouseFile <- data.frame(Warehouse = paste0('warh',unique(warehouse)), maxCapacity = c(1000,1000,500), WACC = c(200, 200, 240),
                             HoldingCostPerBulb = c(15,10,20))
warehouseFile$latitude <- runif(nrow(warehouseFile), minLat, maxLat) 
warehouseFile$longitude <- runif(nrow(warehouseFile), minLong, maxLong)
  
write.csv(warehouseFile, file = "Warehouse_reduced.csv",row.names=F)


### Manufacturer.csv file

manufacturers <- c('Philips', 'Bell', 'GE')
manufacturerFile <- data.frame(Manufacturer = manufacturers, orderBatchSize = c(5,10,5))
write.csv(manufacturerFile, file = "Manufacturer_reduced.csv",row.names=F)


### ManufacturerWarehouse.csv file

temp <- merge(warehouseFile,manufacturerFile)
manufacturerWarehouseDF <- data.frame(id = seq(1,nrow(temp),1), Manufacturer = temp$Manufacturer, Warehouse = temp$Warehouse)
manufacturerWarehouseDF$standardShippingTime <- sample(3:5,nrow(manufacturerWarehouseDF), replace=T)
manufacturerWarehouseDF$standardShippingPricePerItem <- sample(4:10,nrow(manufacturerWarehouseDF), replace=T)
manufacturerWarehouseDF$expeditedShippingTime <- rep(1,nrow(manufacturerWarehouseDF))
manufacturerWarehouseDF$expeditedShippingPricePerItem <- sample(20:50,nrow(manufacturerWarehouseDF), replace=T)

rm(temp)



### SmartBulb.csv, SmartBulbMeasurement.csv, SmartBulbFixture.csv, Order.csv, OrderContent.csv, InventoryItem.csv

beg_date <- ISOdate(2015,1,1,0)
latest_beg_date <- ISOdate(2016,5,1,0)
end_date <- ISOdate(2018,2,1,0)
hourly_dates <- seq(beg_date, to = end_date, by = "1 hour")
daily_dates <- seq(beg_date, to = end_date, by = "1 day")
manufacturer_list <- c("GE","Philips","Bell")
bulbType_list <- c("LED","CFL","INCAN")
warehouse_list <- c('warh1', 'warh2', 'warh3')
minInvLevel <- floor(fixNum / 50)
expLeadTime <- 1

death_total_cst1 <- 3000
death_total_cst2 <- 3100
death_trailing_cst1 <- 40
death_trailing_cst2 <- 42
death_sd_cst1 <- 1.08
death_sd_cst2 <- 1.12
  
## Initializing each fixture with a SmartBulb

init_smartBulbFixtureFile <- data.frame(fixture = paste0("fixt",seq(1,fixNum,1)))
init_smartBulbFixtureFile$SN <- paste0("SMBLB",seq(1,fixNum,1))
init_smartBulbFixtureFile$Start <- daily_dates[floor(runif(fixNum, 1, which(daily_dates == latest_beg_date)))]
init_smartBulbFixtureFile$End <- 0                                                          

init_smartBulbFile <- data.frame(Manufacturer = manufacturer_list[floor(runif(fixNum, 1,4))])
init_smartBulbFile$BulbType <- bulbType_list[floor(runif(fixNum, 1,4))]
init_smartBulbFile$Wattage <- ifelse(init_smartBulbFile$BulbType == 'LED', 9.5, ifelse(init_smartBulbFile$BulbType == 'CFL', 13,60))
init_smartBulbFile$SN <- init_smartBulbFixtureFile$SN
#init_smartBulbFile$manufacturedDate <- init_smartBulbFixtureFile$Start - 60*60*24
init_smartBulbFile$manufacturedDate  <- beg_date

## Creating Merged dataframe that will be useful for me

join_dataframes <- function(){
  smartBulbFixtureWarehouseJoin <- merge(init_smartBulbFile,init_smartBulbFixtureFile, by = 'SN')
  smartBulbFixtureWarehouseJoin <- subset(smartBulbFixtureWarehouseJoin)
  smartBulbFixtureWarehouseJoin <- merge(smartBulbFixtureWarehouseJoin, fixtureFile, by = 'fixture')
  smartBulbFixtureWarehouseJoin <- subset(smartBulbFixtureWarehouseJoin, select = -c(latitude, longitude))
  smartBulbFixtureWarehouseJoin <- merge(smartBulbFixtureWarehouseJoin, buildingWarehouseFile, by = 'building')
  smartBulbFixtureWarehouseJoin <- subset(smartBulbFixtureWarehouseJoin, select = -FailureCostPerBulb)
  return(smartBulbFixtureWarehouseJoin)
}
smartBulbFixtureWarehouseJoin <- join_dataframes()
smartBulbFixtureWarehouseJoin$death_total <- floor(runif(nrow(smartBulbFixtureWarehouseJoin), death_total_cst1,death_total_cst2))
smartBulbFixtureWarehouseJoin$death_trailing <- runif(nrow(smartBulbFixtureWarehouseJoin),death_trailing_cst1,death_trailing_cst2)
smartBulbFixtureWarehouseJoin$death_sd <- runif(nrow(smartBulbFixtureWarehouseJoin),death_sd_cst1,death_sd_cst2)


## Initializing each warehouse with bulbs in stock

# function that creates a row to be added to the SmartBulb File and the a row to be added to the join file

generate_lightbulb <- function(manufacturer, bulbtype, warehouse, manufacturedDate){
  bulb <- data.frame(Manufacturer = manufacturer, BulbType = bulbtype) 
  bulb$Wattage <- ifelse(bulb$BulbType == 'LED', 9.5, ifelse(bulb$BulbType == 'CFL', 13,60))
  bulb$SN <- paste0('SMBLB',nrow(smartBulbFixtureWarehouseJoin)+1)
  bulb$StartDate <- ISOdate(0,0,0)
  
  join_array <- data.frame(building = 0, fixture = 0, SN = bulb$SN, Manufacturer = bulb$Manufacturer, BulbType = bulb$BulbType,
                  Wattage = bulb$Wattage, Start = ISOdate(0,0,0), End = ISOdate(0,0,0), manufacturedDate = manufacturedDate, apartment = 0, warehouse = warehouse,
                  death_total = floor(runif(1, death_total_cst1,death_total_cst2)), death_trailing = runif(1,death_trailing_cst1,death_trailing_cst2), 
                  death_sd = runif(1,death_sd_cst1,death_sd_cst2))
  output <- list(Bulb = bulb, Join = join_array)
  return(output)
}

# creating bulbs that are randomly distributed between the 3 warehouses

for (i in 1:floor(fixNum/10)){
  new_bulb <- generate_lightbulb(manufacturer = manufacturer_list[floor(runif(1, 1,4))], bulbtype = bulbType_list[floor(runif(1, 1,4))], 
  warehouse = warehouse_list[floor(runif(1, 1,4))], manufacturedDate = beg_date)
  #init_smartBulbFile <- rbind(init_smartBulbFile, new_bulb$Bulb)
  smartBulbFixtureWarehouseJoin <- rbind(smartBulbFixtureWarehouseJoin, new_bulb$Join)
}

## Defining Inventory Items and Initializing Daily Metrics and generating inventoryItemFile

tempInv <- expand.grid(Manufacturer = manufacturer_list, BulbType = bulbType_list, Warehouse = warehouse_list)
tempInv$InventoryItemID <- seq(1,nrow(tempInv),1)
tempInv$InitialInventoryLevel <- 0
for (manuf in manufacturer_list){
  for (bulbtype in bulbType_list){
    for (warehouse in warehouse_list){
      tempInv[tempInv$Manufacturer == manuf & tempInv$BulbType == bulbtype & tempInv$Warehouse == warehouse,]$InitialInventoryLevel <- 
        nrow(smartBulbFixtureWarehouseJoin[smartBulbFixtureWarehouseJoin$Manufacturer == manuf & 
                                                      smartBulbFixtureWarehouseJoin$BulbType == bulbtype &
                                                      smartBulbFixtureWarehouseJoin$warehouse == warehouse &
                                                      is.na(smartBulbFixtureWarehouseJoin$Start),])
    }
  }
}

metricsTable <- data.frame(Day = daily_dates)
metricsTable$InventoryLevel <- 0
metricsTable$Demand <- 0
metricsTable$Replacements <- 0
metricsTable$Arrivals <- 0
inventoryItems <- vector("list",nrow(tempInv))
for (i in 1:nrow(tempInv)){
  inventoryItems[[i]] <- c(tempInv[i,],Table = list(metricsTable))
  inventoryItems[[i]] <- c(inventoryItems[[i]], Fixtures = list(smartBulbFixtureWarehouseJoin[
    smartBulbFixtureWarehouseJoin$warehouse == inventoryItems[[i]]$Warehouse &
      smartBulbFixtureWarehouseJoin$Manufacturer == inventoryItems[[i]]$Manufacturer &
      smartBulbFixtureWarehouseJoin$BulbType == inventoryItems[[i]]$BulbType &
      !is.na(smartBulbFixtureWarehouseJoin$fixture),]$fixture))
  inventoryItems[[i]] <- c(inventoryItems[[i]], bulbsToReplace = list(c()))
}

inventoryItemFile <- tempInv[,c('Warehouse','Manufacturer','BulbType')]
write.csv(inventoryItemFile, file = "InventoryItem_reduced.csv",row.names=F)

rm(tempInv)


## Generating a bunch of different days 

# simulate a bunch of new temperature days that are warmer / colder than average
avg_temps <- c(52,51,50,49,48,50,52,54,56,58,60,62,65,68,70,70,68,66,62,59,57,55,54,52)
final_temps <- {}
for (i in seq(-20,20,2)){
  new_temp <- avg_temps + i
  final_temps <- cbind(final_temps,new_temp)
}

# create weekday slots of light status --> 1's mean on and its during mid day time period
wkdays <- array(0,dim=c(24,100))
for (i in 7:19) {
  wkdays[i,1] <- 1
} 

# randomly create a bunch of other days, changing 1's to 0's 
for (y in 2:100) {
  for (i in 7:19){
    u <- runif(1)
    if (u < 0.3){
      wkdays[i,y] <- 0
    }  else{
      wkdays[i,y] <- wkdays[i,1]
    }
  }
}

# same process for weekend, lights only on for 4 hours to start
wkenddays <- array(0,dim=c(24,100))
for (i in 10:14) {
  wkenddays[i,1] <- 1
}

# simulate a bunch of weekend days changing 1s to 0s
for (y in 2:100) {
  for (i in 10:14){
    u <- runif(1)
    if (u < 0.3){
      wkenddays[i,y] <- 0
    }  else{
      wkenddays[i,y] <- wkenddays[i,1]
    }
  }
}

# set steady voltage target and period to use for trailing stats (1 week)
voltage_target = 120
trailingSD = 24 * 7

# set other values
lumens_target <- 1000
watt_target <- 15
lumens_decay_factor <- .1

# Initialize data frames where my measurements are going to go
all_status <- data.frame(TS = hourly_dates)
all_watts <- data.frame(TS = hourly_dates)
all_lumens <- data.frame(TS = hourly_dates)
all_voltage <- data.frame(TS = hourly_dates)
all_temps <- data.frame(TS = hourly_dates)
all_time_on <- data.frame(placeholder = 0)
all_alive <- data.frame(placeholder = 0)
# all_trail_watts <- data.frame(placeholder = rep(0,trailingSD)) #determine if this is necessary

# Initialize orders
orders <- data.frame(orderContentID = integer(0), bulbType = character(0), quantity = integer(0), orderDate = double(0), 
                               manufacturerWarehouseRelation = integer(0), manufacturer = character(0), warehouse = character(0), orderID = integer(0))

print(paste0('Total Number of Days in Simulation: ', length(daily_dates)))

for (d in 1:length(daily_dates)){                     # loop on days
  
  for (i in 1:length(inventoryItems)){                # loop on InventoryItems
    
    thisInventoryItem <- inventoryItems[[i]]
    
    for (fixture in thisInventoryItem$Fixtures){    # loop on fixtures "belonging" to an InventoryItem
        
      currentBulb <- smartBulbFixtureWarehouseJoin[smartBulbFixtureWarehouseJoin$fixture == fixture & 
                                                     smartBulbFixtureWarehouseJoin$End == 0 &
                                                     !is.na(smartBulbFixtureWarehouseJoin$fixture),]$SN
      
      # randomly select consumption behavior for the day
      if (daily_dates[d] >= smartBulbFixtureWarehouseJoin[smartBulbFixtureWarehouseJoin$SN == currentBulb,]$Start){
        if (d %% 7 < 2) {
          day_int <- floor(runif(1, 1,101)) 
          day_add <- wkenddays[,day_int]
        } else {
          day_int <- floor(runif(1, 1,101)) 
          day_add <- wkdays[,day_int]
        }
      } else {
        day_add <- rep(0,24)
      }
      
      
      # test if the bulb in place is new, in which case we need to generate lots of 0 for the dates before where we are now
      if (!(currentBulb %in% names(all_status))){                                     
        all_status$tempName <- 0
        names(all_status)[names(all_status) == "tempName"] <- currentBulb
        all_time_on$tempName <- 0
        names(all_time_on)[names(all_time_on) == "tempName"] <- currentBulb
        all_watts$tempName <- 0
        names(all_watts)[names(all_watts) == "tempName"] <- currentBulb
        all_lumens$tempName <- 0
        names(all_lumens)[names(all_lumens) == "tempName"] <- currentBulb
        all_voltage$tempName <- 0
        names(all_voltage)[names(all_voltage) == "tempName"] <- currentBulb
        all_temps$tempName <- 0
        names(all_temps)[names(all_temps) == "tempName"] <- currentBulb
        all_alive$tempName <- 0
        names(all_alive)[names(all_alive) == "tempName"] <- currentBulb
        
        #all_status$currentBulb[1:(d-1)*24] <- 0
        #all_time_on$currentBulb <- 0
        #all_watts$currentBulb[1:(d-1)*24] <- 0
        #all_lumens$currentBulb[1:(d-1)*24] <- 0
        #all_voltage$currentBulb[1:(d-1)*24] <- 0
        #all_temps$currentBulb[1:(d-1)*24] <- 0
      }
      
     
      # generate hourly data for this day
      all_status[((d-1)*24 + 1):(d*24), names(all_status) == currentBulb] <- day_add
      all_time_on[, names(all_time_on) == currentBulb] <- sum(all_status[,names(all_status) == currentBulb])
      all_watts[((d-1)*24 + 1):(d*24), names(all_watts) == currentBulb] <- pmax((watt_target * day_add)+round(rnorm(24,0,1)*day_add),1)*day_add
      all_lumens[((d-1)*24 + 1):(d*24), names(all_lumens) == currentBulb] <- round(lumens_target*exp(-(all_time_on[, names(all_time_on) == currentBulb]/(24*365))*lumens_decay_factor)*day_add,1)
      all_voltage[((d-1)*24 + 1):(d*24), names(all_voltage) == currentBulb] <- voltage_target*day_add+round(rnorm(24,0,1)*day_add)
      all_temps[((d-1)*24 + 1):(d*24), names(all_temps) == currentBulb] <- ifelse(daily_dates[d] >= smartBulbFixtureWarehouseJoin[smartBulbFixtureWarehouseJoin$SN == currentBulb,]$Start,
                                                                                  final_temps[,floor(runif(1, 1,22))],rep(0,24))
      
      #all_status$currentBulb[((d-1)*24 + 1):(d*24)] <- day_add
      #all_time_on$currentBulb <- sum(all_status$currentBulb)
      #all_watts$currentBulb[((d-1)*24 + 1):(d*24)] <- pmax((watt_target * day_add)+round(rnorm(24,0,1)*day_add),1)*day_add
      #all_lumens$currentBulb[((d-1)*24 + 1):(d*24)] <- round(lumens_target*exp(-(all_time_on$currentBulb/(24*365))*lumens_decay_factor)*day_add,1)
      #all_voltage$currentBulb[((d-1)*24 + 1):(d*24)] <- voltage_target*day_add+round(rnorm(24,0,1)*day_add)
      #all_temps$currentBulb[((d-1)*24 + 1):(d*24)] <- final_temps[,floor(runif(1, 1,22))]
      
      # determine if the lightbulb is dead or not
      if (d > 7){
        all_trail_watts <- all_watts[(24*d-trailingSD):(24*d),currentBulb]
        non_zero <- all_trail_watts[all_trail_watts != 0]
        stdev_metric <- sd(non_zero)
        #total_switch <- sum(abs(diff(all_status[, names(all_status) == currentBulb])))
        trail_switch <- sum(abs(diff(all_status[(24*d-trailingSD):(24*d), names(all_status) == currentBulb])))
        all_alive[, names(all_alive) == currentBulb] <- 1- ((all_time_on[, names(all_time_on) == currentBulb] >= smartBulbFixtureWarehouseJoin[
          smartBulbFixtureWarehouseJoin$SN == currentBulb, ]$death_total) & 
            (trail_switch >= smartBulbFixtureWarehouseJoin[smartBulbFixtureWarehouseJoin$SN == currentBulb, ]$death_trailing) & 
            (stdev_metric>= smartBulbFixtureWarehouseJoin[smartBulbFixtureWarehouseJoin$SN == currentBulb, ]$death_sd) )
        
        #all_trail_watts$currentBulb <- all_watts$currentBulb[(length(all_watts$currentBulb)-trailingSD):length(all_watts$currentBulb)]
        #non_zero <- all_trail_watts$currentBulb[all_trail_watts$currentBulb != 0]
        #stdev_metric <- sd(non_zero)
        #total_switch <- sum(abs(diff(all_status$currentBulb)))
        #trail_switch <- sum(abs(diff(all_status$currentBulb[(length(all_status$currentBulb)-trailingSD):length(all_status$currentBulb)])))
        #all_alive$currentBulb <- 1- ((all_time_on$currentBulb >= smartBulbFixtureWarehouseJoin[
        #  smartBulbFixtureWarehouseJoin$SN == currentBulb, ]$death_total) & 
        #    (trail_switch >= smartBulbFixtureWarehouseJoin[smartBulbFixtureWarehouseJoin$SN == currentBulb, ]$death_trailing) & 
        #    (stdev_metric>= smartBulbFixtureWarehouseJoin[smartBulbFixtureWarehouseJoin$SN == currentBulb, ]$death_sd) )
        
        # if bulb is dead, add it to the bulbs to replace list and add 1 to the demand, and then modify measurements to represent the fact that the lightbulb died
        if (all_alive[, names(all_alive) == currentBulb] == 0){
          thisInventoryItem$bulbsToReplace <- c(thisInventoryItem$bulbsToReplace, currentBulb)
          thisInventoryItem$Table[d,]$Demand <- thisInventoryItem$Table[d,]$Demand + 1
          
          all_lumens[tail(which(all_status[,names(all_status) == currentBulb] > 0), n=1), names(all_lumens) == currentBulb] <- 0
        }
      }

    }
    
    ## end of day calculations for this InventoryItem
    
    # creating bulbs according to the number of arrivals and adding them to the list of bulbs
    num_arrivals <- thisInventoryItem$Table[d,]$Arrivals
    if (num_arrivals > 0){            # necessary because of behavior of r that counts two elements in 1:0
      for (k in 1:num_arrivals){
        arrived_bulb <- generate_lightbulb(manufacturer = thisInventoryItem$Manufacturer, 
                                           bulbtype = thisInventoryItem$BulbType, 
                                           warehouse = thisInventoryItem$Warehouse,
                                           manufacturedDate = daily_dates[d-expLeadTime])
        smartBulbFixtureWarehouseJoin <- rbind(smartBulbFixtureWarehouseJoin, arrived_bulb$Join)
      }
    }
    
    # adding the arrival number to the InventoryLevel
    if(d == 1){
      thisInventoryItem$Table[d,]$InventoryLevel <- thisInventoryItem$InitialInventoryLevel + num_arrivals
    } else {
      thisInventoryItem$Table[d,]$InventoryLevel <- thisInventoryItem$Table[d-1,]$InventoryLevel + num_arrivals
    }
    
    # updating replacements metric
    if (thisInventoryItem$Table[d,]$InventoryLevel >= thisInventoryItem$Table[d,]$Demand){
      thisInventoryItem$Table[d,]$Replacements <- thisInventoryItem$Table[d,]$Demand
    } else {
      thisInventoryItem$Table[d,]$Replacements <- thisInventoryItem$Table[d,]$InventoryLevel
    }
    
    # updating inventoryLevel metric and demand metric for next day (changes if inventory insufficient)
    thisInventoryItem$Table[d,]$InventoryLevel <- thisInventoryItem$Table[d,]$InventoryLevel - thisInventoryItem$Table[d,]$Replacements
    thisInventoryItem$Table[d+1,]$Demand <- thisInventoryItem$Table[d,]$Demand - thisInventoryItem$Table[d,]$Replacements
    
    # replacing all the dead bulbs that I can
    if (thisInventoryItem$Table[d,]$Replacements > 0){
      for (deadbulb in thisInventoryItem$bulbsToReplace[1:thisInventoryItem$Table[d,]$Replacements]){
        smartBulbFixtureWarehouseJoin[smartBulbFixtureWarehouseJoin$SN == deadbulb,]$End <- as.POSIXct(hourly_dates[(d+1)*24],origin="1970-01-01", tz = 'GMT')
        replacement_bulb_index <- which(smartBulbFixtureWarehouseJoin$Manufacturer == thisInventoryItem$Manufacturer &
                                          smartBulbFixtureWarehouseJoin$BulbType == thisInventoryItem$BulbType &  
                                          smartBulbFixtureWarehouseJoin$warehouse == thisInventoryItem$Warehouse &
                                          is.na(smartBulbFixtureWarehouseJoin$Start))[1]
        smartBulbFixtureWarehouseJoin[replacement_bulb_index, ]$building  <- 
          smartBulbFixtureWarehouseJoin[smartBulbFixtureWarehouseJoin$SN == deadbulb,]$building 
        smartBulbFixtureWarehouseJoin[replacement_bulb_index, ]$fixture  <- 
          smartBulbFixtureWarehouseJoin[smartBulbFixtureWarehouseJoin$SN == deadbulb,]$fixture 
        smartBulbFixtureWarehouseJoin[replacement_bulb_index, ]$apartment  <-
          smartBulbFixtureWarehouseJoin[smartBulbFixtureWarehouseJoin$SN == deadbulb,]$apartment
        smartBulbFixtureWarehouseJoin[replacement_bulb_index, ]$Start <- as.POSIXct(hourly_dates[(d+1)*24],origin="1970-01-01", tz = 'GMT')
        smartBulbFixtureWarehouseJoin[replacement_bulb_index, ]$End <- 0
        
        # remove deadbulb from bulbsToReplace
        thisInventoryItem$bulbsToReplace <- thisInventoryItem$bulbsToReplace[! thisInventoryItem$bulbsToReplace %in% c(deadbulb)]
      }
    }
    
    
    # order Bulbs if necessary, and update orders accordingly
    if (thisInventoryItem$Table[d,]$InventoryLevel < minInvLevel) {
      orderQuantity <- floor(runif(1,1,4))* manufacturerFile[manufacturerFile$Manufacturer == thisInventoryItem$Manufacturer,]$orderBatchSize
      thisInventoryItem$Table[d+expLeadTime,]$Arrivals <- thisInventoryItem$Table[d+expLeadTime,]$Arrivals + orderQuantity
      orderContentID = nrow(orders) + 1
      bulbType = thisInventoryItem$BulbType
      new_row <- data.frame(orderContentID = nrow(orders) + 1)
      new_row$bulbType <- thisInventoryItem$BulbType
      new_row$quantity <- orderQuantity
      new_row$orderDate <- daily_dates[d]
      new_row$manufacturerWarehouseRelation <- manufacturerWarehouseDF[
        manufacturerWarehouseDF$Manufacturer == thisInventoryItem$Manufacturer & 
          manufacturerWarehouseDF$Warehouse == thisInventoryItem$Warehouse,]$id
      new_row$manufacturer <- thisInventoryItem$Manufacturer
      new_row$warehouse <- thisInventoryItem$Warehouse
      new_row$orderID <- 0                     
      orders <- rbind(orders, new_row)
    }
    inventoryItems[[i]] <- thisInventoryItem
  }
  print(paste0('end of day',d))
}

smartBulbFixtureWarehouseJoin$End <- as.POSIXct(smartBulbFixtureWarehouseJoin$End, origin="1970-01-01", tz = 'GMT')

# select columns from smartBulbFixtureWarehouseJoin to create Smartbulb.csv file and SmartBulbFixture.csv file

smartBulbFile <- smartBulbFixtureWarehouseJoin[,c('Manufacturer', 'BulbType','Wattage','SN', 'manufacturedDate', 'warehouse')]
smartBulbFile$manufacturedDate <- strftime(smartBulbFile$manufacturedDate, "%Y-%m-%dT%H:%M:%S%z")
smartBulbFile$warehouseArrivalDate <- smartBulbFixtureWarehouseJoin$manufacturedDate + 60*60*24
smartBulbFile$warehouseArrivalDate <- strftime(smartBulbFile$warehouseArrivalDate, "%Y-%m-%dT%H:%M:%S%z")
write.csv(smartBulbFile, file = "SmartBulb_reduced.csv",row.names=F)

smartBulbFixtureFile <- smartBulbFixtureWarehouseJoin[,c('fixture','SN','Start','End')]
smartBulbFixtureFile <- smartBulbFixtureFile[!is.na(smartBulbFixtureFile$fixture),]
smartBulbFixtureFile$Start <- strftime(smartBulbFixtureFile$Start, "%Y-%m-%dT%H:%M:%S%z")
smartBulbFixtureFile$End <- strftime(smartBulbFixtureFile$End, "%Y-%m-%dT%H:%M:%S%z")
write.csv(smartBulbFixtureFile, file = "SmartBulbFixture_reduced.csv",row.names=F)

#smartBulbFixtureFileAdjusted <- smartBulbFixtureFile
#smartBulbFixtureFileAdjusted <- smartBulbFixtureFileAdjusted[!is.na(smartBulbFixtureFileAdjusted$Start),]
#write.csv(smartBulbFixtureFileAdjusted, file = "SmartBulbFixtureAdjustedJune1.csv",row.names=F)

# reshape measurement dataframes and cbind them together to create SmartBulbMeasurement.csv file

reshaped_status <- melt(all_status, id = c("TS"))
names(reshaped_status) <- c('TS','SN','Status')
reshaped_watts <- melt(all_watts, id = c("TS"))
names(reshaped_watts) <- c('TS','SN','Watts')
reshaped_lumens <- melt(all_lumens, id = c("TS"))
names(reshaped_lumens) <- c('TS','SN','Lumens')
reshaped_voltage <- melt(all_voltage, id = c("TS"))
names(reshaped_voltage) <- c('TS','SN','Voltage')
reshaped_temps <- melt(all_temps, id = c("TS"))
names(reshaped_temps) <- c('TS','SN','Temp')

smartBulbMeasurementFile <- reshaped_status
smartBulbMeasurementFile$Watts <- reshaped_watts$Watts
smartBulbMeasurementFile$Lumens <- reshaped_lumens$Lumens
smartBulbMeasurementFile$Voltage <- reshaped_voltage$Voltage
smartBulbMeasurementFile$Temp <- reshaped_temps$Temp
smartBulbMeasurementFile <- smartBulbMeasurementFile[!is.na(smartBulbMeasurementFile$TS),]
smartBulbMeasurementFile <- smartBulbMeasurementFile[!(smartBulbMeasurementFile$Temp == 0),]
smartBulbMeasurementFile$TS <- strftime(smartBulbMeasurementFile$TS, "%Y-%m-%dT%H:%M:%S%z")
  
write.csv(smartBulbMeasurementFile, file = "SmartBulbMeasurement_reduced.csv", row.names=F)

# Create Order.csv and OrderContent.csv from orders

orders$orderID <- seq(1,nrow(orders),1)
for (i in 1:nrow(orders)){
  for (j in 1:i){
    if((orders[i,]$manufacturerWarehouseRelation == orders[j,]$manufacturerWarehouseRelation) && (orders[i,]$orderDate == orders[j,]$orderDate)){
      orders[i,]$orderID <- orders[j,]$orderID
    } 
  }
}

orders$orderID <- paste0('ord',orders$orderID)
orders$orderContentID <- paste0('ordCont',orders$orderContentID)

orderContentFile <- orders[,c('orderContentID','bulbType','quantity', 'orderID')]
write.csv(orderContentFile, file = "OrderContent_reduced.csv", row.names=F)

orderFile <- subset(orders,!duplicated(orders$orderID))
orderFile <- orderFile[,c('orderID', 'orderDate','manufacturer','warehouse')]
orderFile$orderType <- 'expedited'
orderFile$orderApprovalStatus <- 'approved'
orderFile$orderDate <- strftime(orderFile$orderDate, "%Y-%m-%dT%H:%M:%S%z")
write.csv(orderFile, file = "Order_reduced.csv", row.names=F)

manufacturerWarehouseFile <- manufacturerWarehouseDF[, -1]
write.csv(manufacturerWarehouseFile, file = "ManufacturerWarehouse_reduced.csv",row.names=F)


orderFileNew <- orders[,-1]
orderFileNew$orderType <- 'expedited'
orderFileNew$orderApprovalStatus <- 'approved'
orderFileNew$orderDate <- strftime(orderFileNew$orderDate, "%Y-%m-%dT%H:%M:%S%z")
0 Likes

#4

Here is an example JS Batch Job that creates 5 million fake MeasurementSeries:

function doStart(job) {
	for (var i = 0; i < 5000; i++) {
		JSBatchJob.scheduleBatch(job, JSBatchJobBatch.make({batchContext: i.toString() }));
	};
}

function processBatch(batch, job, options) {
	
	var log = C3.logger("ManufactureIntervalSeries");
	log.info("dataload_batch : " + batch.batchContext);

	var pmsArr = [];
	for (var j = 0; j < 1000; j++) {
		pmsArr.push(PhysicalMeasurementSeries.make({id: 'FakeSeries_' + ('0000' + batch.batchContext).substr(-4,4) + ('0000' + j).substr(-3,3)}));
	};
	PhysicalMeasurementSeries.upsertBatch(pmsArr);
}

var spec = JSBatchJobSpec.make({
	doStart: doStart.toString(),
	processBatch: processBatch.toString(),
	doNotAbortOnError: true
})

var job = JS.batchJob(spec);

Here is an example JS MapReduce job that generates a week of interval data for all the series in the system:

var map = function(batch, objs, job) { 
	var readings = C3.typesys.Arry.fromReference(Measurement, []);;
		var x = 15; //minutes interval
		var tt = new Date("2017-12-25T00:00:00"); // start time
	objs.each(function(pms) { 
		for (var j=0;j<96*7; j++) {
			var start = new Date(tt + j * x * 60000);
			var end = new Date(start + x * 60000);
			var r = Measurement.make({
				parent:pms,
				start: start,
				end: end,
				quantity: {value: Math.random() * 100}
			});
			readings.push(r);
		};
	}); 
	Measurement.createBatch(readings);
}

var spec = JSMapReduceSpec.make({
	targetType: PhysicalMeasurementSeries,
	include: "id",
	filter: "startsWith(id,'FakeSeries_')", // modify filter as appropriate
	limit: -1,
	batchSize: 100,
	map: map
});

var mrj = JS.mapReduce(spec);
0 Likes

#5

Here are the different options, including through some scripted methods:

Best Practice for Creating sample data for a UI under development
Within the type system package hierarchy, what is the best way to repeatably create data (across multiple tags) to enable UI development before the application has full data loaded (or created) or to create a demo application that never anticipates actually loading any data from external sources.

Currently supported options include:

  1. creating a new, samplePackage that depends on the package containing the data-model-types and application logic. This samplePackage that has seed/ directory contents that seed the package: upside: super repeatable. Does not require script execution. downside: data lives on if not wiped out properly before connecting to external data sources. Also, it seems like Cassandra types cannot be implemented as seed data. NOTE: is it not suggested to have more than ~100 records being loaded into a given type DURING provisioning through seed data, as this causes provisioning to slow down.

  2. adding a test/seed/ directory that contains data seeding the app for the UI. Upside: super repeatable, data only shows up when provisioning with -E. Does not require script execution. downside: (same as option 1)

  3. Create scripts that add and remove data for the UI (as Yaro and Steve mention). This is the Best option for high volumes of data. Upside: repeatable. Downside: Wipe out needs to be executed or scripted before real data loads.

  4. Mask the api’s the UI uses by hardcoding the api responses to have dummy data with identical structures. (not recommended, as this takes more work and provisioning to undo)

The Platform team is currently working on a sample directory that will sit alongside test, src, seed, and ui directories. When this gets released, this will replace options 1 and 2.

0 Likes

#6

@ColumbusL , might it be possible to write a jasmine test that is paused while running?

The test could be constructed to create all the data, then encounter a debugger statement that would pause the testing process.

Then, you could interact with the application with this generated data existing.

Once done, you could resume the Jasmine test - where the teardown function would remove the data (provided it is still within the context).

0 Likes

#7

Yes, that sounds like a nice variation on option 3, with an added benefit of being able to teardown() the context of the test. Note that any data created through interactions with the Test-created data will live on: For example, if the TestApi setup creates an Asset, and in the course of some development work or UI testing interactions, the status of that asses changes, then the TimedValueHistory record that persists the change in the status of that asset will continue to exist after the test is torn down.

0 Likes

#8

I have this handy script I use to completely remove data (all instances and all SourceFiles/Uploaded) from a tag. It requires TenantAdmin privileges (to delete and re-create the tags).

Note that this is configured to operate on vanity urls configured for the stage-training2 or stage-training3 environment and the lightbulb tenant, but this could easily be configured for your needs.

var env = ;  // fill in `2` for stage-training2, or `3` for stage-training3
var startAt = ; // example: `23` means start at tag `dev23` 
var endBefore = ; // example: `25` means erase `dev23 and `dev24`, but do not continue to `dev25`
var summary = {}; // summary object to report results
// begin loop`
for(i=startAt; i<endBefore; i++){ // set the loop based on above values
   var x = 1; // counter

   tag = "dev" + i.toString(); // generate the tag name, ex: `dev23`, based on the inputs

       try {  // try block to handle any errors
           console.log("Removing " + tag);
           res = Tag.remove(tag); // remove the tag
           console.log("Removed " + tag + " | Result: " + res ); // "true" returned if successful
           console.log("Creating " + tag);
           res = Tag.create(tag);  // create the tag, object returned if successful
           console.log("Created " + tag + " | Result:" );
           console.log(res);
	c3SwitchAll("lightbulb", tag) // switch contexts to recently created tag, so commands are executed against this tag
	console.log("Removing S3 Files on " + tag + " (on lightbulb tenant)" );
	delFiles = S3.deleteFiles("s3://c3.prod--stage-training"+env+"/fs/lightbulb/"+ tag + "/",true) //delete ALL files from the S3 bucket associated with the Tag
	console.log("Removed " + delFiles + " S3 files"); // output the number of files deleted

       }
       catch(err) {
           summary[tag] = x;  // collect tags that had errors
           x = x + 1
       }

}
console.log("Done. Summary variable below");
console.log(summary); // output summary

After specifying the tags and env, this script does Tag.remove() and Tag.create(), which should remove ALL metadata and seed data in the tag, then re-create it as a “vanilla”.

Then, it switches context to the recently created tag and removes all SourceFiles.

Note: This can’t be run from the tag you are cleaning. Ex: If you want to clean the dev1 tag, you have to execute this script from any other tag.

0 Likes