library(lavaan)
library(lavaanPlot)
library(fastDummies)
library(ggcorrplot)
library(candisc)
Az adatok betöltése:
# Szűrés, adatok rendezése, végső adattábla elkészítése
data <- read.csv("C:/Users/Dell/Downloads/20230403.csv", stringsAsFactors = FALSE)
# Hajtás szűrés (első, hátsó, összkerék)
hajtas_tipus <- c("Első kerék", "Hátsó kerék", "Összkerék")
data <- data[data$Hajtás %in% hajtas_tipus, ]
# Klíma kategorizálása (digitális, manuális, automata)
data$Klima <- ifelse(
grepl("digitális", data$Klíma.fajtája, ignore.case = TRUE), "Digitális",
ifelse(grepl("automata", data$Klíma.fajtája, ignore.case = TRUE), "Automata",
ifelse(grepl("manuális", data$Klíma.fajtája, ignore.case = TRUE), "Manuális", NA))
)
data <- data[!is.na(data$Klima), ]
# üzemanyag szerinti szűrés
data <- data[grepl("Benzin|Dízel|Elektromos", data$Üzemanyag., ignore.case = TRUE), ]
# Csak a három kategória megtartása pontosított nevekkel
data$uzemanyag <- ifelse(
grepl("Benzin", data$Üzemanyag., ignore.case = TRUE), "Benzin",
ifelse(grepl("Dízel", data$Üzemanyag., ignore.case = TRUE), "Dízel",
ifelse(grepl("Elektromos", data$Üzemanyag., ignore.case = TRUE), "Elektromos", NA))
)
# outlier-szűrés (személyek száma)
szemszam_outlier <- sort(unique(data$Szállítható.szem..száma), decreasing = TRUE)[1:3]
data <- data[!data$Szállítható.szem..száma %in% szemszam_outlier, ]
# Márka alapján az első 22 legtöbb darabszámú márka kiválasztása
nepszeru_marka <- names(sort(table(data$márka), decreasing = TRUE)[1:22])
data <- data[data$márka %in% nepszeru_marka, ]
# Sebességváltó kategorizálása (automata, manuális)
data$sebvalto <- ifelse(
grepl("automata", data$Sebességváltó, ignore.case = TRUE), "automata",
ifelse(grepl("manuális", data$Sebességváltó, ignore.case = TRUE), "manualis", NA)
)
data <- data[!is.na(data$sebvalto), ]
# kor változó létrehozása
data$kor <- 2023-data$Évjárat.
# redundáns változók elhagyása
data <- data[, -c(1,4,6,7,10,11,13,15,19,20,22)]
data <- na.omit(data)
# ár logaritmusát vesszük
hist(log(data$Vételár.))
data$price <- log(data$Vételár.)
data <- data[, -c(3,11)] # eredeti vételár és márkák kiszedése
Sztenderdizálás és dummy-k
car <- dummy_cols(data, remove_selected_columns = TRUE,
remove_most_frequent_dummy = TRUE)
car_st <- data.frame(scale(car))
colnames(car_st)[colnames(car_st) == "Hengerűrtartalom."] <- "henger"
colnames(car_st)[colnames(car_st) == "Teljesítmény."] <- "teljesitmeny"
colnames(car_st)[colnames(car_st) == "Teljes.tömeg."] <- "tomeg"
colnames(car_st)[colnames(car_st) == "Km..óra.állás."] <- "ut"
colnames(car_st)[colnames(car_st) == "Szállítható.szem..száma."] <- "szemelyszam"
colnames(car_st)[colnames(car_st) == "Csomagtartó."] <- "csomagtarto"
colnames(car_st)[colnames(car_st) == "uzemanyag_Elektromos"] <- "elektromos"
colnames(car_st)[colnames(car_st) == "uzemanyag_Dízel"] <- "dizel"
colnames(car_st)[colnames(car_st) == "sebvalto_automata"] <- "automatasebvalto"
colnames(car_st)[colnames(car_st) == "Állapot._Kitűnő"] <- "kituno"
colnames(car_st)[colnames(car_st) == "Állapot._Sérülésmentes"] <- "serulesmentes"
colnames(car_st)[colnames(car_st) == "Állapot._Újszerű"] <- "ujszeru"
colnames(car_st)[colnames(car_st) == "Állapot._Megkímélt"] <- "megkimelt"
colnames(car_st)[colnames(car_st) == "Hajtás._Összkerék"] <- "osszkerek"
colnames(car_st)[colnames(car_st) == "Hajtás._Hátsó.kerék"] <- "hatsokerek"
colnames(car_st)[colnames(car_st) == "Klima_Automata"] <- "automataklima"
colnames(car_st)[colnames(car_st) == "Klima_Manuális"] <- "manualisklima"
ggcorrplot(cor(car_st[, c(1:8, 25:31)]))
Modellépítés
mod1 <- "price~teljesitmeny+ut+kor+tomeg"
sem1 <- sem(mod1, data=car_st, estimator="MLF")
summary(sem1, fit=TRUE) #CFI:1.000; TLI:1.000; RMSEA:0.000
## lavaan 0.6-19 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 5
##
## Number of observations 52698
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Model Test Baseline Model:
##
## Test statistic 107731.217
## Degrees of freedom 4
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 1.000
## Tucker-Lewis Index (TLI) 1.000
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -20909.114
## Loglikelihood unrestricted model (H1) -20909.114
##
## Akaike (AIC) 41828.229
## Bayesian (BIC) 41872.591
## Sample-size adjusted Bayesian (SABIC) 41856.700
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.000
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.000
## P-value H_0: RMSEA <= 0.050 NA
## P-value H_0: RMSEA >= 0.080 NA
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.000
##
## Parameter Estimates:
##
## Standard errors Standard
## Information First.order
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## price ~
## teljesitmeny 0.389 0.001 365.919 0.000
## ut -0.080 0.000 -824.010 0.000
## kor -0.689 0.001 -605.822 0.000
## tomeg 0.066 0.000 622.137 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .price 0.129 0.000 1228.093 0.000
lavaanPlot(sem1, coef=TRUE, sig=0.05)
mod2 <- "price~teljesitmeny+ut+kor+tomeg+automatasebvalto+dizel
ut~kor
automatasebvalto~kor
dizel~tomeg"
sem2 <- sem(mod2, data=car_st, estimator="MLF")
summary(sem2, fit=TRUE) #CFI:0.843; TLI:0.686; RMSEA:0.234
## lavaan 0.6-19 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 13
##
## Number of observations 52698
##
## Model Test User Model:
##
## Test statistic 25889.827
## Degrees of freedom 9
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 164893.359
## Degrees of freedom 18
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.843
## Tucker-Lewis Index (TLI) 0.686
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -229597.125
## Loglikelihood unrestricted model (H1) -216652.212
##
## Akaike (AIC) 459220.250
## Bayesian (BIC) 459335.590
## Sample-size adjusted Bayesian (SABIC) 459294.276
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.234
## 90 Percent confidence interval - lower 0.231
## 90 Percent confidence interval - upper 0.236
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 1.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.137
##
## Parameter Estimates:
##
## Standard errors Standard
## Information First.order
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## price ~
## teljesitmeny 0.331 0.002 207.029 0.000
## ut -0.089 0.002 -51.502 0.000
## kor -0.664 0.001 -524.020 0.000
## tomeg 0.046 0.003 15.761 0.000
## automatasebvlt 0.119 0.002 59.579 0.000
## dizel 0.022 0.002 11.786 0.000
## ut ~
## kor 0.522 0.005 106.386 0.000
## automatasebvalto ~
## kor -0.311 0.005 -64.490 0.000
## dizel ~
## tomeg 0.304 0.006 54.766 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .price 0.120 0.000 1265.745 0.000
## .ut 0.728 0.001 517.367 0.000
## .automatasebvlt 0.903 0.010 89.803 0.000
## .dizel 0.908 0.023 39.682 0.000
lavaanPlot(sem2, coef=TRUE, sig=0.05)
Bár mindegyik kapcsolat szignifikáns, a mutatók alapján romlott a modell becslőereje.
mod3 <- "price~teljesitmeny+ut+kor+tomeg+automatasebvalto+dizel+
kituno+megkimelt+serulesmentes+ujszeru
ut~kor
automatasebvalto~kor
dizel~tomeg
"
sem3 <- sem(mod3, data=car_st, estimator="MLF")
summary(sem3, fit=TRUE) #CFI:0.838; TLI:0.738; RMSEA:0.157
## lavaan 0.6-19 ended normally after 2 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 17
##
## Number of observations 52698
##
## Model Test User Model:
##
## Test statistic 27267.907
## Degrees of freedom 21
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 168472.323
## Degrees of freedom 34
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.838
## Tucker-Lewis Index (TLI) 0.738
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -228496.683
## Loglikelihood unrestricted model (H1) -214862.730
##
## Akaike (AIC) 457027.366
## Bayesian (BIC) 457178.196
## Sample-size adjusted Bayesian (SABIC) 457124.170
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.157
## 90 Percent confidence interval - lower 0.155
## 90 Percent confidence interval - upper 0.158
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 1.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.091
##
## Parameter Estimates:
##
## Standard errors Standard
## Information First.order
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## price ~
## teljesitmeny 0.325 0.002 203.947 0.000
## ut -0.080 0.002 -43.284 0.000
## kor -0.642 0.001 -496.102 0.000
## tomeg 0.045 0.003 15.767 0.000
## automatasebvlt 0.115 0.002 58.958 0.000
## dizel 0.024 0.002 13.126 0.000
## kituno 0.074 0.002 40.102 0.000
## megkimelt 0.048 0.002 27.128 0.000
## serulesmentes 0.035 0.002 18.578 0.000
## ujszeru 0.060 0.002 32.216 0.000
## ut ~
## kor 0.522 0.005 105.816 0.000
## automatasebvalto ~
## kor -0.311 0.005 -64.410 0.000
## dizel ~
## tomeg 0.304 0.006 53.674 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .price 0.115 0.000 1201.842 0.000
## .ut 0.728 0.001 524.397 0.000
## .automatasebvlt 0.903 0.010 89.675 0.000
## .dizel 0.908 0.023 39.226 0.000
lavaanPlot(sem3, coef=TRUE, sig=0.05)
Határozottan jobb, mint az előző, de nem elfogadható így sem sajnos.
Látens változó a modellben –> az autó nagysága
mod4<-"luxus=~teljesitmeny+automatasebvalto+szemelyszam+henger
ut~kor
automatasebvalto~kor
dizel~tomeg
price~teljesitmeny+ut+kor+tomeg+automatasebvalto+dizel+luxus
"
sem4 <- sem(mod4, data=car_st, estimator="MLF", std.lv=TRUE)
summary(sem4, fit=TRUE) #CFI:0.819; TLI:0.699; RMSEA:0.212
## lavaan 0.6-19 ended normally after 45 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 21
##
## Number of observations 52698
##
## Model Test User Model:
##
## Test statistic 49904.084
## Degrees of freedom 21
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 276133.184
## Degrees of freedom 35
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.819
## Tucker-Lewis Index (TLI) 0.699
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -410308.510
## Loglikelihood unrestricted model (H1) -385356.468
##
## Akaike (AIC) 820659.019
## Bayesian (BIC) 820845.338
## Sample-size adjusted Bayesian (SABIC) 820778.600
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.212
## 90 Percent confidence interval - lower 0.211
## 90 Percent confidence interval - upper 0.214
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 1.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.152
##
## Parameter Estimates:
##
## Standard errors Standard
## Information First.order
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## luxus =~
## teljesitmeny 0.907 0.004 217.281 0.000
## automatasebvlt 0.570 0.007 87.588 0.000
## szemelyszam -0.040 0.004 -9.896 0.000
## henger 0.930 0.003 285.723 0.000
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## ut ~
## kor 0.522 0.005 105.445 0.000
## automatasebvalto ~
## kor -0.264 0.004 -66.019 0.000
## dizel ~
## tomeg 0.304 0.005 56.526 0.000
## price ~
## teljesitmeny 0.183 0.007 27.391 0.000
## ut -0.091 0.002 -53.013 0.000
## kor -0.694 0.002 -407.348 0.000
## tomeg 0.042 0.003 13.421 0.000
## automatasebvlt 0.098 0.002 45.333 0.000
## dizel 0.005 0.002 2.755 0.006
## luxus 0.173 0.007 24.158 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .teljesitmeny 0.178 0.003 53.909 0.000
## .automatasebvlt 0.580 0.005 123.478 0.000
## .szemelyszam 0.998 0.003 390.038 0.000
## .henger 0.134 0.003 39.293 0.000
## .ut 0.728 0.001 509.435 0.000
## .dizel 0.908 0.023 39.652 0.000
## .price 0.116 0.000 471.333 0.000
## luxus 1.000
lavaanPlot(sem4, coef=TRUE, sig=0.05)
Nem igazán javult tőle (egyre rosszabb).
Másik látens változó bevezetése
mod5<-"nagy=~henger+csomagtarto+szemelyszam+tomeg
ut~kor
automatasebvalto~kor
dizel~tomeg
price~teljesitmeny+ut+kor+tomeg+automatasebvalto+dizel+nagy
"
sem5 <- sem(mod5, data=car_st, estimator="MLF", std.lv=TRUE)
summary(sem5, fit=TRUE) #CFI:0.561; TLI:0.356; RMSEA:0.283
## lavaan 0.6-19 ended normally after 29 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 22
##
## Number of observations 52698
##
## Model Test User Model:
##
## Test statistic 126249.160
## Degrees of freedom 30
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 287401.590
## Degrees of freedom 44
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.561
## Tucker-Lewis Index (TLI) 0.356
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -517621.568
## Loglikelihood unrestricted model (H1) -454496.988
##
## Akaike (AIC) 1035287.136
## Bayesian (BIC) 1035482.327
## Sample-size adjusted Bayesian (SABIC) 1035412.411
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.283
## 90 Percent confidence interval - lower 0.281
## 90 Percent confidence interval - upper 0.284
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 1.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.208
##
## Parameter Estimates:
##
## Standard errors Standard
## Information First.order
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## nagy =~
## henger 0.616 0.007 86.683 0.000
## csomagtarto 0.457 0.006 75.983 0.000
## szemelyszam 0.144 0.006 25.143 0.000
## tomeg 0.762 0.009 87.809 0.000
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## ut ~
## kor 0.522 0.005 106.198 0.000
## automatasebvalto ~
## kor -0.311 0.005 -63.008 0.000
## dizel ~
## tomeg 0.304 0.019 16.378 0.000
## price ~
## teljesitmeny 0.293 0.002 123.038 0.000
## ut -0.093 0.002 -53.749 0.000
## kor -0.671 0.001 -477.848 0.000
## tomeg -0.040 0.008 -5.123 0.000
## automatasebvlt 0.114 0.002 56.968 0.000
## dizel 0.008 0.002 4.426 0.000
## nagy 0.142 0.008 17.887 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .henger 0.620 0.006 99.947 0.000
## .csomagtarto 0.791 0.006 143.326 0.000
## .szemelyszam 0.979 0.003 282.244 0.000
## .tomeg 0.419 0.010 41.057 0.000
## .ut 0.728 0.001 498.103 0.000
## .automatasebvlt 0.903 0.011 83.908 0.000
## .dizel 0.908 0.051 17.759 0.000
## .price 0.113 0.001 167.496 0.000
## nagy 1.000
lavaanPlot(sem5, coef=TRUE, sig=0.05)
Erről ne is beszéljünk…
mod6<-" ut~kor
regi=~manualisklima+automatasebvalto+kor+ut
automatasebvalto~kor
dizel~tomeg
price~teljesitmeny+ut+kor+tomeg+automatasebvalto+dizel+regi
"
sem6 <- sem(mod6, data=car_st, estimator="MLF", std.lv=TRUE)
summary(sem6, fit=TRUE) #CFI:0.813; TLI:0.612; RMSEA:0.225
## lavaan 0.6-19 ended normally after 46 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 20
##
## Number of observations 52698
##
## Model Test User Model:
##
## Test statistic 34663.976
## Degrees of freedom 13
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 185362.648
## Degrees of freedom 27
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.813
## Tucker-Lewis Index (TLI) 0.612
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -373299.001
## Loglikelihood unrestricted model (H1) -355967.013
##
## Akaike (AIC) 746638.002
## Bayesian (BIC) 746815.448
## Sample-size adjusted Bayesian (SABIC) 746751.888
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.225
## 90 Percent confidence interval - lower 0.223
## 90 Percent confidence interval - upper 0.227
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 1.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.182
##
## Parameter Estimates:
##
## Standard errors Standard
## Information First.order
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## regi =~
## manualisklima 0.810 0.031 26.515 0.000
## automatasebvlt -0.397 0.019 -20.722 0.000
## kor 0.331 0.014 23.736 0.000
## ut -0.157 0.009 -16.992 0.000
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## ut ~
## kor 0.574 0.007 88.019 0.000
## automatasebvalto ~
## kor -0.180 0.012 -15.201 0.000
## dizel ~
## tomeg 0.304 0.005 61.213 0.000
## price ~
## teljesitmeny 0.316 0.002 193.147 0.000
## ut -0.099 0.002 -45.069 0.000
## kor -0.642 0.003 -210.268 0.000
## tomeg 0.041 0.003 14.240 0.000
## automatasebvlt 0.097 0.004 26.781 0.000
## dizel 0.012 0.002 6.652 0.000
## regi -0.087 0.006 -14.484 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .manualisklima 0.344 0.049 6.986 0.000
## .automatasebvlt 0.762 0.016 48.502 0.000
## .kor 0.891 0.010 84.836 0.000
## .ut 0.706 0.002 330.303 0.000
## .dizel 0.908 0.023 39.624 0.000
## .price 0.115 0.001 197.961 0.000
## regi 1.000
lavaanPlot(sem6, coef=TRUE, sig=0.05)
Still nem okes.
mod7<-"nagy=~tomeg+henger+teljesitmeny+dizel
ut~kor
automatasebvalto~kor
dizel~tomeg
price~teljesitmeny+ut+kor+tomeg+automatasebvalto
"
sem7 <- sem(mod7, data=car_st, estimator="MLF", std.lv=TRUE)
summary(sem7, fit=TRUE) #CFI:0.803; TLI:0.655; RMSEA:0.253
## lavaan 0.6-19 ended normally after 29 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 19
##
## Number of observations 52698
##
## Model Test User Model:
##
## Test statistic 53807.994
## Degrees of freedom 16
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 273204.321
## Degrees of freedom 28
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.803
## Tucker-Lewis Index (TLI) 0.655
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -413724.896
## Loglikelihood unrestricted model (H1) -386820.899
##
## Akaike (AIC) 827487.793
## Bayesian (BIC) 827656.367
## Sample-size adjusted Bayesian (SABIC) 827595.985
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.253
## 90 Percent confidence interval - lower 0.251
## 90 Percent confidence interval - upper 0.254
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 1.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.179
##
## Parameter Estimates:
##
## Standard errors Standard
## Information First.order
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## nagy =~
## tomeg 0.475 0.007 72.234 0.000
## henger 0.985 0.005 191.145 0.000
## teljesitmeny 0.857 0.005 160.952 0.000
## dizel 0.210 0.014 15.376 0.000
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## ut ~
## kor 0.522 0.005 105.286 0.000
## automatasebvalto ~
## kor -0.311 0.005 -60.641 0.000
## dizel ~
## tomeg 0.204 0.013 15.515 0.000
## price ~
## teljesitmeny 0.330 0.002 166.085 0.000
## ut -0.082 0.002 -47.793 0.000
## kor -0.667 0.001 -520.362 0.000
## tomeg 0.052 0.003 18.454 0.000
## automatasebvlt 0.121 0.002 60.319 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .tomeg 0.774 0.003 280.956 0.000
## .henger 0.030 0.006 5.055 0.000
## .teljesitmeny 0.266 0.005 58.653 0.000
## .dizel 0.873 0.048 18.341 0.000
## .ut 0.728 0.001 561.367 0.000
## .automatasebvlt 0.903 0.011 82.820 0.000
## .price 0.120 0.000 1232.742 0.000
## nagy 1.000
lavaanPlot(sem7, coef=TRUE, sig=0.05)
cor(car_st$henger,car_st$tomeg)
## [1] 0.4627088
mod8<-" ut~kor
regi=~manualisklima+automatasebvalto+kor+ut
dizel~tomeg
price~teljesitmeny+ut+kor+tomeg+automatasebvalto+regi
"
sem8 <- sem(mod8, data=car_st, estimator="MLF", std.lv=TRUE)
summary(sem8, fit=TRUE) #CFI:0.812; TLI:0.637; RMSEA:0.218
## lavaan 0.6-19 ended normally after 29 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 19
##
## Number of observations 52698
##
## Model Test User Model:
##
## Test statistic 34917.790
## Degrees of freedom 14
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 185362.648
## Degrees of freedom 27
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.812
## Tucker-Lewis Index (TLI) 0.637
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -373425.908
## Loglikelihood unrestricted model (H1) -355967.013
##
## Akaike (AIC) 746889.816
## Bayesian (BIC) 747058.390
## Sample-size adjusted Bayesian (SABIC) 746998.008
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.218
## 90 Percent confidence interval - lower 0.216
## 90 Percent confidence interval - upper 0.219
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 1.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.183
##
## Parameter Estimates:
##
## Standard errors Standard
## Information First.order
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## regi =~
## manualisklima 0.589 0.007 79.644 0.000
## automatasebvlt -0.629 0.009 -73.394 0.000
## kor 0.477 0.007 73.037 0.000
## ut -0.174 0.009 -19.861 0.000
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## ut ~
## kor 0.605 0.006 97.856 0.000
## dizel ~
## tomeg 0.304 0.005 61.220 0.000
## price ~
## teljesitmeny 0.316 0.002 193.187 0.000
## ut -0.110 0.002 -54.115 0.000
## kor -0.599 0.003 -200.946 0.000
## tomeg 0.044 0.003 16.868 0.000
## automatasebvlt 0.039 0.004 10.703 0.000
## regi -0.175 0.006 -29.080 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## .dizel ~~
## .price 0.011 0.002 6.542 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .manualisklima 0.654 0.010 63.542 0.000
## .automatasebvlt 0.605 0.010 61.808 0.000
## .kor 0.772 0.007 115.294 0.000
## .ut 0.704 0.002 361.735 0.000
## .dizel 0.908 0.023 39.626 0.000
## .price 0.105 0.001 122.480 0.000
## regi 1.000
lavaanPlot(sem8, coef=TRUE, sig=0.05)
Modifikációs index
modindices(sem8, sort=TRUE, maximum.number=10)
## lhs op rhs mi epc sepc.lv sepc.all
## 78 regi ~ price 19379.131 -2.302 -2.302 -2.085
## 81 regi ~ teljesitmeny 18258.341 -0.758 -0.758 -0.758
## 68 teljesitmeny ~ automatasebvalto 11416.728 0.413 0.413 0.413
## 69 teljesitmeny ~ regi 10838.724 -0.518 -0.518 -0.518
## 75 automatasebvalto ~ teljesitmeny 10245.536 0.398 0.398 0.398
## 72 automatasebvalto ~ price 8752.407 0.899 0.899 0.814
## 80 regi ~ tomeg 8602.663 -0.520 -0.520 -0.520
## 38 ut ~ dizel 4031.779 0.234 0.234 0.234
## 65 teljesitmeny ~ price 4029.860 0.319 0.319 0.289
## 36 ut ~~ dizel 3684.996 0.213 0.213 0.267
## sepc.nox
## 78 -2.085
## 81 -0.758
## 68 0.413
## 69 -0.518
## 75 0.398
## 72 0.814
## 80 -0.520
## 38 0.234
## 65 0.289
## 36 0.267
mod9<-" ut~kor+dizel
regi=~manualisklima+automatasebvalto+kor+ut
dizel~tomeg
teljesitmeny~~automatasebvalto
teljesitmeny~regi
price~teljesitmeny+ut+kor+tomeg+automatasebvalto+regi
"
sem9 <- sem(mod9, data=car_st, estimator="MLF", std.lv=TRUE)
summary(sem9, fit=TRUE) #CFI:0.911; TLI:0.808; RMSEA:0.161
## lavaan 0.6-19 ended normally after 35 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 22
##
## Number of observations 52698
##
## Model Test User Model:
##
## Test statistic 17686.765
## Degrees of freedom 13
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 197919.598
## Degrees of freedom 28
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.911
## Tucker-Lewis Index (TLI) 0.808
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -433306.643
## Loglikelihood unrestricted model (H1) -424463.260
##
## Akaike (AIC) 866657.286
## Bayesian (BIC) 866852.478
## Sample-size adjusted Bayesian (SABIC) 866782.561
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.161
## 90 Percent confidence interval - lower 0.159
## 90 Percent confidence interval - upper 0.163
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 1.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.141
##
## Parameter Estimates:
##
## Standard errors Standard
## Information First.order
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## regi =~
## manualisklima 0.618 0.008 81.359 0.000
## automatasebvlt -0.627 0.009 -68.775 0.000
## kor 0.439 0.006 71.366 0.000
## ut -0.056 0.008 -6.851 0.000
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## ut ~
## kor 0.550 0.006 93.936 0.000
## dizel 0.253 0.005 47.856 0.000
## dizel ~
## tomeg 0.304 0.005 61.800 0.000
## teljesitmeny ~
## regi -0.641 0.009 -70.592 0.000
## price ~
## teljesitmeny 0.260 0.003 75.390 0.000
## ut -0.094 0.002 -50.169 0.000
## kor -0.616 0.002 -247.511 0.000
## tomeg 0.044 0.003 16.770 0.000
## automatasebvlt 0.062 0.003 18.828 0.000
## regi -0.187 0.007 -26.945 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## .automatasebvalto ~~
## .teljesitmeny 0.180 0.008 23.091 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .manualisklima 0.618 0.011 58.693 0.000
## .automatasebvlt 0.606 0.010 60.351 0.000
## .kor 0.807 0.007 122.658 0.000
## .ut 0.656 0.001 537.332 0.000
## .dizel 0.908 0.023 39.676 0.000
## .teljesitmeny 0.589 0.009 68.067 0.000
## .price 0.105 0.001 117.250 0.000
## regi 1.000
lavaanPlot(sem9, coef=TRUE, sig=0.05)
mod10<-" ut~kor+dizel
regitech=~manualisklima+automatasebvalto+teljesitmeny+kor
dizel~tomeg
teljesitmeny~~automatasebvalto
price~teljesitmeny+ut+kor+tomeg+automatasebvalto+regitech
"
sem10 <- sem(mod10, data=car_st, estimator="MLF", std.lv=TRUE)
summary(sem10, fit=TRUE) #CFI:0.910; TLI:0.820; RMSEA:0.155
## lavaan 0.6-19 ended normally after 29 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 21
##
## Number of observations 52698
##
## Model Test User Model:
##
## Test statistic 17792.294
## Degrees of freedom 14
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 197919.598
## Degrees of freedom 28
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.910
## Tucker-Lewis Index (TLI) 0.820
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -433359.408
## Loglikelihood unrestricted model (H1) -424463.260
##
## Akaike (AIC) 866760.815
## Bayesian (BIC) 866947.134
## Sample-size adjusted Bayesian (SABIC) 866880.396
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.155
## 90 Percent confidence interval - lower 0.153
## 90 Percent confidence interval - upper 0.157
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 1.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.142
##
## Parameter Estimates:
##
## Standard errors Standard
## Information First.order
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## regitech =~
## manualisklima 0.611 0.008 80.359 0.000
## automatasebvlt -0.635 0.009 -68.933 0.000
## teljesitmeny -0.647 0.009 -71.286 0.000
## kor 0.438 0.006 71.061 0.000
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## ut ~
## kor 0.526 0.005 107.346 0.000
## dizel 0.263 0.005 52.113 0.000
## dizel ~
## tomeg 0.304 0.005 61.973 0.000
## price ~
## teljesitmeny 0.257 0.004 71.253 0.000
## ut -0.089 0.002 -52.506 0.000
## kor -0.618 0.002 -269.379 0.000
## tomeg 0.044 0.003 16.586 0.000
## automatasebvlt 0.059 0.003 16.793 0.000
## regitech -0.193 0.007 -26.775 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## .automatasebvalto ~~
## .teljesitmeny 0.172 0.008 21.860 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .manualisklima 0.626 0.011 59.311 0.000
## .automatasebvlt 0.597 0.010 59.467 0.000
## .teljesitmeny 0.582 0.009 66.309 0.000
## .kor 0.808 0.007 122.765 0.000
## .ut 0.658 0.001 562.385 0.000
## .dizel 0.908 0.023 39.652 0.000
## .price 0.104 0.001 109.987 0.000
## regitech 1.000
lavaanPlot(sem10, coef=TRUE, sig=0.05)
modindices(sem10, sort=TRUE, maximum.number=10)
## lhs op rhs mi epc sepc.lv sepc.all sepc.nox
## 68 tomeg ~ regitech 11248.796 -0.570 -0.570 -0.570 -0.570
## 85 regitech ~ tomeg 11248.796 -0.570 -0.570 -0.570 -0.570
## 66 tomeg ~ teljesitmeny 11172.980 0.460 0.460 0.460 0.460
## 67 tomeg ~ automatasebvalto 7135.442 0.368 0.368 0.368 0.368
## 64 tomeg ~ price 5902.567 0.359 0.359 0.357 0.357
## 73 teljesitmeny ~ tomeg 3757.263 0.210 0.210 0.210 0.210
## 82 regitech ~ dizel 2680.470 -0.272 -0.272 -0.272 -0.272
## 28 manualisklima ~~ dizel 1410.271 -0.137 -0.137 -0.181 -0.181
## 83 regitech ~ price 1392.265 -2.693 -2.693 -2.676 -2.676
## 65 tomeg ~ kor 1245.664 -0.154 -0.154 -0.154 -0.154
mod11<-" ut~a*kor+dizel
regitech=~manualisklima+automatasebvalto+teljesitmeny+kor
dizel~tomeg
tomeg~regitech+teljesitmeny
teljesitmeny~~automatasebvalto
price~teljesitmeny+b*ut+c*kor+tomeg+automatasebvalto+regitech
"
sem11 <- sem(mod11, data=car_st, estimator="MLF", std.lv=TRUE)
summary(sem11, fit=TRUE) #CFI:0.982; TLI:0.958; RMSEA:0.075
## lavaan 0.6-19 ended normally after 30 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 24
##
## Number of observations 52698
##
## Model Test User Model:
##
## Test statistic 3542.273
## Degrees of freedom 12
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 197919.598
## Degrees of freedom 28
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.982
## Tucker-Lewis Index (TLI) 0.958
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -501009.120
## Loglikelihood unrestricted model (H1) -499237.983
##
## Akaike (AIC) 1002066.240
## Bayesian (BIC) 1002279.176
## Sample-size adjusted Bayesian (SABIC) 1002202.903
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.075
## 90 Percent confidence interval - lower 0.073
## 90 Percent confidence interval - upper 0.077
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.040
##
## Parameter Estimates:
##
## Standard errors Standard
## Information First.order
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## regitech =~
## manualisklima 0.611 0.008 81.378 0.000
## automatasebvlt -0.647 0.009 -68.498 0.000
## teljesitmeny -0.654 0.009 -69.066 0.000
## kor 0.422 0.006 69.444 0.000
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## ut ~
## kor (a) 0.526 0.005 107.033 0.000
## dizel 0.263 0.005 51.986 0.000
## dizel ~
## tomeg 0.304 0.019 15.891 0.000
## tomeg ~
## regitech -0.327 0.017 -19.781 0.000
## teljestmny 0.246 0.011 22.395 0.000
## price ~
## teljestmny 0.263 0.004 71.155 0.000
## ut (b) -0.089 0.002 -52.169 0.000
## kor (c) -0.620 0.002 -280.402 0.000
## tomeg 0.014 0.004 3.898 0.000
## autmtsbvlt 0.055 0.004 15.130 0.000
## regitech -0.206 0.008 -25.877 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## .automatasebvalto ~~
## .teljesitmeny 0.159 0.008 19.809 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .manualisklima 0.626 0.010 60.617 0.000
## .automatasebvlt 0.581 0.010 59.219 0.000
## .teljesitmeny 0.572 0.009 61.661 0.000
## .kor 0.822 0.007 123.822 0.000
## .ut 0.658 0.001 558.744 0.000
## .dizel 0.908 0.053 17.189 0.000
## .tomeg 0.727 0.006 117.779 0.000
## .price 0.104 0.001 100.954 0.000
## regitech 1.000
lavaanPlot(sem11, coef=TRUE, sig=0.05)
# a kor az autó használatának kezdete (üzembehelyezés(?)), így nem abszolút külső adottság, hanem több mindentől függhet
exp(coef(sem11)["c"]) # közvetlen hatása a kornak --> 46%-kal csökkenti
## c
## 0.5381191
exp(coef(sem11)["b"]*coef(sem11)["a"]) # --> 4,5%-kal csökkenti közvetetten a megtett úton keresztül
## b
## 0.954115
exp(coef(sem11)["c"]+coef(sem11)["b"]*coef(sem11)["a"]) # összességében 49%-kal csökkenti az ár várható értékét az autó kora
## c
## 0.5134275