thr3ads.net - R help es - [R-es] Consulta [Sep 2019]

If this information is useful, please help other people find it:
Share via:

BERENICE DOMINGUEZ SANCHEZ

2019-Sep-23 23:49 UTC

[R-es] Consulta

Buenas tarde a todo en s:

Tenia la versión de R 3.6 y utilizaba la paquetería de pdftools para extraer
información de archivos en pdf actualice la versión 3.6.1 y ya no reconoce la
paquetería alguien que me pueda ayudar. Prácticamente no reconoce las funciones
de pdftools

library(pdftools)
library(stringr)?
library(NLP)?
library(tm)?
library(tesseract)?
library(magick)?
install.packages("magick")?
install.packages("pdftools")?
?
txt <- system.file("texts", "txt", package =
"tm")?
?
rfc_rg <- "([A-Z]{3,})([0-9]{6})([A-Z]|[0-9]){0,3}"?
#poliza_rg <-
"(34|36|37|39)(ME|MEC|CH|MB|TF|GI|VE|TS|IM|ER|VE)*([0-9]{6,})[-]([0-9]){2}[-][A-Z]"?
poliza_rg <-
"(ME|CH|MB|TF|GI|gi|VE|TS|IM|ER|VE)*([0-9]{8,})[-]([0-9]){2}"?
registro_rg <- "(CNSF-H0711-)([0-9]{4})[- ]([0-9]){4}"?
subgrupo_rg <- "_([0-9]){1,3}."?
mon_rg <- "SMGM|UMAM|MN"?
?
?
ruta <- 'C:/Users/bdominguez/Documents/H0711/Bond/1909/'?
archivos<-list.files(path=ruta,pattern = '*.pdf')?
?
?
imagen <-
image_read_pdf(path=paste(ruta,"/",nombre,".pdf",sep=""))?
prueba <-image_ocr(imagen, language = 'eng')?
lineas<-unlist(str_split(prueba,pattern = "\n"))?
lineasp<-unlist(str_split(prueba[2],pattern = "\r\n"))?
?
newnom <- NULL?
renglones <- NULL?
for (nombre in archivos){?
  subgrupo <- str_extract(str_extract(nombre,pattern = subgrupo_rg),pattern =
"[0-9]{1,3}")?
  imagen <-
image_read_pdf(path=paste(ruta,"/",nombre,".pdf",sep=""))?
  prueba <-image_ocr(imagen, language = 'eng')?
  lineas<-unlist(str_split(prueba,pattern = "\n"))?
  poliza <- NULL?
  poliza<-str_extract(lineas[1],poliza_rg)?
  newnom <-
c(newnom,paste(poliza[1],substr(nombre,5,6),".pdf",sep=''))?
  ?
  registro <- NULL?
  registro<-str_extract(lineas[49],registro_rg)?
  ?
  rfc <- NULL?
  rfc <- str_extract(lineas[5],rfc_rg)?
  ?
  ?
  #lineasnew<-unlist(str_split(lineas[2],pattern = "\r\n"))?
  #lineasdosnew<-unlist(str_split(lineas[1],pattern = "\r\n"))?
  ?
  cobertura <- NA?
  extranjera <- NA?
  suma_str   <- NA?
  deducible_str <- NA?
  ?
  suma <- NA?
  coaseguro <- NA?
  deducible <- NA?
  tope <- NA?
  mon <- NA?
  mondedu <- NA?
  ?
  cobertura  <- grep("Cobertura en el
Extranjero",lineas,value=TRUE)?
  extranjera <- grep("Emergencia en el
Extranjero",lineas,value=TRUE)?
  suma_str   <- grep("SUMA ASEGURADA:",lineas,value=TRUE)?
  deducible_str   <- grep("DEDUCIBLE:",lineas,value=TRUE)?
  sumacob <- NA?
  sumaext <- NA?
  ?
  pprimaria <- grep("Numero de Póliza:", lineas, value = TRUE)?
  dnprimariaa <- grep("Nombre de la Aseguradora Primaria:", lineas,
value = TRUE)?
  ?
  #cer<- grep("Certificado No. ",lineas, value=TRUE)?
  #ntit<- grep("Ramo", lineas, value=TRUE)?
  ?
  sumacob<-as.numeric(str_extract(cobertura[1],pattern =
"[0-9]{1,}"))?
  if (length(sumacob)==0){?
    sumacob = NA?
  }?
  ?
  sumaext<-as.numeric(str_extract(extranjera[17],pattern =
"[0-9]{1,}"))?
  if (length(sumaext)==0){?
    sumaext = NA?
  }?
  valores <- NULL?
  monedas <- NULL?
  valores <- str_extract_all(suma_str[17],pattern =
"[0-9]{0,3},*[0-9]{0,3},*[0-9]{1,3}(.[0-9]{1,}){0,1}",simplify=TRUE)?
  monedas <- str_extract(suma_str,pattern = mon_rg)?
  if (length(valores[1])==0){?
    suma = NA?
    mon = NA?
  }else{?
    suma = as.numeric(gsub(pattern = ",*",replacement =
"",valores[1]))?
    mon <- as.character(monedas[1])?
  }?
  ?
  if (length(valores[2])==0){?
    coaseguro = NA?
  }else{?
    coaseguro = as.numeric(valores[2])?
  }?
  valores <- NULL?
  valores <- str_extract_all(deducible_str[1],pattern =
"[0-9]{0,3},*[0-9]{0,3},*[0-9]{1,3}(.[0-9]{1,}){0,1}",simplify=TRUE)?
  ?
  if (length(valores[1])==0){?
    deducible <- NA?
  }else{?
    deducible <- as.numeric(gsub(pattern = ",",replacement =
"",valores[1]))?
  }?
  ?
  monedas <- NULL  ?
  monedas <- str_extract(deducible_str[1],pattern = mon_rg)?
  ?
  if (length(monedas)==0){?
    mondedu <- NA?
  }else{?
    mondedu <- monedas?
  }?
  ?
  ?
  if (length(valores[2])==0){?
    tope = NA?
  }else{?
    tope = as.numeric(gsub(pattern = ",",replacement =
"",valores[2]))?
  }?
  ?
  renglon <-
data.frame(archivo=nombre,poliza=as.character(poliza[1]),cobertura=sumacob,emergencia=sumaext,registro=registro[1],suma=suma,coaseguro=coaseguro,deducible=deducible,tope=tope,rfc=rfc,mon=mon,mondedu=mondedu,subgrupo=subgrupo,
cert=as.character(cer[1]), cer_tit=as.character(lineasdos[14]),
titu=as.character(lineasdos[10]))?
  renglones <- rbind(renglones,renglon)?
}?
?
# Con los datos del data frame renombra los archivos hay que crear los
subdirectorios?
?
noms <- data.frame(archivo=archivos,poliza=newnom)?
?
noms <-
renglones[!is.na(renglones$poliza),c('archivo','cer_tit')]?
ungrupo<-sqldf("select poliza,count(cert) from noms group by 1  having
count(cert) <= 1 ")?
noms<-sqldf("select * from noms where poliza in (select poliza from
ungrupo)")?
length(noms$archivo)?
salida <- "/renombra/"?
?
for (i in 1:length(noms[,1])){?
  if (!is.na(noms[i,'cer_tit'])){?
    pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
    pto <-
paste(ruta,salida,noms[i,'cer_tit'],'.pdf',sep='')?
    if (!file.exists(pto)){?
      file.copy(from = pfrom,to=pto)?
    }?
  }?
}?
?
#Ahora las polizas con subgrupos?
noms <-
renglones[!is.na(renglones$poliza),c('archivo','poliza','subgrupo')]?
ungrupo<-sqldf("select poliza,count(subgrupo) from noms group by 1 
having count(subgrupo) > 1 ")?
noms<-sqldf("select * from noms where poliza in (select poliza from
ungrupo) order by poliza")?
length(noms$archivo)?
salida <- "/Renombra/ConGrupos/"?
?
for (i in 1:length(noms[,1])){?
  if (!is.na(noms[i,'poliza'])){?
    pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
    pto <-
paste(ruta,salida,noms[i,'poliza'],'_',noms[i,'subgrupo'],'.pdf',sep='')?
    if (!file.exists(pto)){?
      file.copy(from = pfrom,to=pto)?
    }?
  }?
}?
?
salida <- "/Renombra/Grupos/"?
dirActual <- getwd()?
polizas_con_grupos <- as.character(sqldf("select poliza from
ungrupo")$poliza)?
setwd(ruta)?
for (policita in polizas_con_grupos){?
  archivos <- as.character(sqldf(paste("select archivo from renglones
where poliza like
'%",policita,"%'",sep=''))$archivo)?
  staple_pdf(input_files = archivos,output_filepath =
paste(ruta,salida,policita,".pdf",sep='')) ?
}?
setwd(dirActual)?
?
#Ahora los PDf sin poliza?
noms <-
renglones[is.na(renglones$poliza),c('archivo','poliza')]?
salida <- "/renombra/SinPoliza/"?
for (i in 1:length(noms[,1])){?
  pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
  pto <- paste(ruta,salida,noms[i,'archivo'],sep='')?
  if (!file.exists(pto)){?
    file.copy(from = pfrom,to=pto)?
  }?
}?
?
View(renglones)?
write.csv(renglones,
file='//192.168.1.83/share/CERT/CERTIFICADOS_INDIVIDUALES_295_UMAM/Renombra/CERTIFICADOS_INDIVIDUALES_295_UMAM.csv')?
?
?
?


	[[alternative HTML version deleted]]

Emilio L. Cano

2019-Sep-24 04:56 UTC

head link

[R-es] Consulta

Hola Berenice,

¿Qué quires decir con que no reconoce el paquete? ¿Te da algún mensaje de error?
No sé si has probado a instalar de nuevo el paquete, si no hazlo.

Para poder reproducir el error con tu código haría falta alguno de los pdfs que
utilizas (puedes compartir un enlace a dropbox o similar).

Un saludo,
Emilio
> El 24 sept 2019, a las 1:49, BERENICE DOMINGUEZ SANCHEZ <ds_bere en
hotmail.com> escribió:
> 
> Buenas tarde a todo en s:
> 
> Tenia la versión de R 3.6 y utilizaba la paquetería de pdftools para
extraer información de archivos en pdf actualice la versión 3.6.1 y ya no
reconoce la paquetería alguien que me pueda ayudar. Prácticamente no reconoce
las funciones de pdftools
> 
> library(pdftools)
> library(stringr)?
> library(NLP)?
> library(tm)?
> library(tesseract)?
> library(magick)?
> install.packages("magick")?
> install.packages("pdftools")?
> ?
> txt <- system.file("texts", "txt", package =
"tm")?
> ?
> rfc_rg <- "([A-Z]{3,})([0-9]{6})([A-Z]|[0-9]){0,3}"?
> #poliza_rg <-
"(34|36|37|39)(ME|MEC|CH|MB|TF|GI|VE|TS|IM|ER|VE)*([0-9]{6,})[-]([0-9]){2}[-][A-Z]"?
> poliza_rg <-
"(ME|CH|MB|TF|GI|gi|VE|TS|IM|ER|VE)*([0-9]{8,})[-]([0-9]){2}"?
> registro_rg <- "(CNSF-H0711-)([0-9]{4})[- ]([0-9]){4}"?
> subgrupo_rg <- "_([0-9]){1,3}."?
> mon_rg <- "SMGM|UMAM|MN"?
> ?
> ?
> ruta <- 'C:/Users/bdominguez/Documents/H0711/Bond/1909/'?
> archivos<-list.files(path=ruta,pattern = '*.pdf')?
> ?
> ?
> imagen <-
image_read_pdf(path=paste(ruta,"/",nombre,".pdf",sep=""))?
> prueba <-image_ocr(imagen, language = 'eng')?
> lineas<-unlist(str_split(prueba,pattern = "\n"))?
> lineasp<-unlist(str_split(prueba[2],pattern = "\r\n"))?
> ?
> newnom <- NULL?
> renglones <- NULL?
> for (nombre in archivos){?
>  subgrupo <- str_extract(str_extract(nombre,pattern =
subgrupo_rg),pattern = "[0-9]{1,3}")?
>  imagen <-
image_read_pdf(path=paste(ruta,"/",nombre,".pdf",sep=""))?
>  prueba <-image_ocr(imagen, language = 'eng')?
>  lineas<-unlist(str_split(prueba,pattern = "\n"))?
>  poliza <- NULL?
>  poliza<-str_extract(lineas[1],poliza_rg)?
>  newnom <-
c(newnom,paste(poliza[1],substr(nombre,5,6),".pdf",sep=''))?
>  ?
>  registro <- NULL?
>  registro<-str_extract(lineas[49],registro_rg)?
>  ?
>  rfc <- NULL?
>  rfc <- str_extract(lineas[5],rfc_rg)?
>  ?
>  ?
>  #lineasnew<-unlist(str_split(lineas[2],pattern = "\r\n"))?
>  #lineasdosnew<-unlist(str_split(lineas[1],pattern = "\r\n"))?
>  ?
>  cobertura <- NA?
>  extranjera <- NA?
>  suma_str   <- NA?
>  deducible_str <- NA?
>  ?
>  suma <- NA?
>  coaseguro <- NA?
>  deducible <- NA?
>  tope <- NA?
>  mon <- NA?
>  mondedu <- NA?
>  ?
>  cobertura  <- grep("Cobertura en el
Extranjero",lineas,value=TRUE)?
>  extranjera <- grep("Emergencia en el
Extranjero",lineas,value=TRUE)?
>  suma_str   <- grep("SUMA ASEGURADA:",lineas,value=TRUE)?
>  deducible_str   <- grep("DEDUCIBLE:",lineas,value=TRUE)?
>  sumacob <- NA?
>  sumaext <- NA?
>  ?
>  pprimaria <- grep("Numero de Póliza:", lineas, value = TRUE)?
>  dnprimariaa <- grep("Nombre de la Aseguradora Primaria:",
lineas, value = TRUE)?
>  ?
>  #cer<- grep("Certificado No. ",lineas, value=TRUE)?
>  #ntit<- grep("Ramo", lineas, value=TRUE)?
>  ?
>  sumacob<-as.numeric(str_extract(cobertura[1],pattern =
"[0-9]{1,}"))?
>  if (length(sumacob)==0){?
>    sumacob = NA?
>  }?
>  ?
>  sumaext<-as.numeric(str_extract(extranjera[17],pattern =
"[0-9]{1,}"))?
>  if (length(sumaext)==0){?
>    sumaext = NA?
>  }?
>  valores <- NULL?
>  monedas <- NULL?
>  valores <- str_extract_all(suma_str[17],pattern =
"[0-9]{0,3},*[0-9]{0,3},*[0-9]{1,3}(.[0-9]{1,}){0,1}",simplify=TRUE)?
>  monedas <- str_extract(suma_str,pattern = mon_rg)?
>  if (length(valores[1])==0){?
>    suma = NA?
>    mon = NA?
>  }else{?
>    suma = as.numeric(gsub(pattern = ",*",replacement =
"",valores[1]))?
>    mon <- as.character(monedas[1])?
>  }?
>  ?
>  if (length(valores[2])==0){?
>    coaseguro = NA?
>  }else{?
>    coaseguro = as.numeric(valores[2])?
>  }?
>  valores <- NULL?
>  valores <- str_extract_all(deducible_str[1],pattern =
"[0-9]{0,3},*[0-9]{0,3},*[0-9]{1,3}(.[0-9]{1,}){0,1}",simplify=TRUE)?
>  ?
>  if (length(valores[1])==0){?
>    deducible <- NA?
>  }else{?
>    deducible <- as.numeric(gsub(pattern = ",",replacement =
"",valores[1]))?
>  }?
>  ?
>  monedas <- NULL  ?
>  monedas <- str_extract(deducible_str[1],pattern = mon_rg)?
>  ?
>  if (length(monedas)==0){?
>    mondedu <- NA?
>  }else{?
>    mondedu <- monedas?
>  }?
>  ?
>  ?
>  if (length(valores[2])==0){?
>    tope = NA?
>  }else{?
>    tope = as.numeric(gsub(pattern = ",",replacement =
"",valores[2]))?
>  }?
>  ?
>  renglon <-
data.frame(archivo=nombre,poliza=as.character(poliza[1]),cobertura=sumacob,emergencia=sumaext,registro=registro[1],suma=suma,coaseguro=coaseguro,deducible=deducible,tope=tope,rfc=rfc,mon=mon,mondedu=mondedu,subgrupo=subgrupo,
cert=as.character(cer[1]), cer_tit=as.character(lineasdos[14]),
titu=as.character(lineasdos[10]))?
>  renglones <- rbind(renglones,renglon)?
> }?
> ?
> # Con los datos del data frame renombra los archivos hay que crear los
subdirectorios?
> ?
> noms <- data.frame(archivo=archivos,poliza=newnom)?
> ?
> noms <-
renglones[!is.na(renglones$poliza),c('archivo','cer_tit')]?
> ungrupo<-sqldf("select poliza,count(cert) from noms group by 1 
having count(cert) <= 1 ")?
> noms<-sqldf("select * from noms where poliza in (select poliza from
ungrupo)")?
> length(noms$archivo)?
> salida <- "/renombra/"?
> ?
> for (i in 1:length(noms[,1])){?
>  if (!is.na(noms[i,'cer_tit'])){?
>    pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
>    pto <-
paste(ruta,salida,noms[i,'cer_tit'],'.pdf',sep='')?
>    if (!file.exists(pto)){?
>      file.copy(from = pfrom,to=pto)?
>    }?
>  }?
> }?
> ?
> #Ahora las polizas con subgrupos?
> noms <-
renglones[!is.na(renglones$poliza),c('archivo','poliza','subgrupo')]?
> ungrupo<-sqldf("select poliza,count(subgrupo) from noms group by 1 
having count(subgrupo) > 1 ")?
> noms<-sqldf("select * from noms where poliza in (select poliza from
ungrupo) order by poliza")?
> length(noms$archivo)?
> salida <- "/Renombra/ConGrupos/"?
> ?
> for (i in 1:length(noms[,1])){?
>  if (!is.na(noms[i,'poliza'])){?
>    pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
>    pto <-
paste(ruta,salida,noms[i,'poliza'],'_',noms[i,'subgrupo'],'.pdf',sep='')?
>    if (!file.exists(pto)){?
>      file.copy(from = pfrom,to=pto)?
>    }?
>  }?
> }?
> ?
> salida <- "/Renombra/Grupos/"?
> dirActual <- getwd()?
> polizas_con_grupos <- as.character(sqldf("select poliza from
ungrupo")$poliza)?
> setwd(ruta)?
> for (policita in polizas_con_grupos){?
>  archivos <- as.character(sqldf(paste("select archivo from
renglones where poliza like
'%",policita,"%'",sep=''))$archivo)?
>  staple_pdf(input_files = archivos,output_filepath =
paste(ruta,salida,policita,".pdf",sep='')) ?
> }?
> setwd(dirActual)?
> ?
> #Ahora los PDf sin poliza?
> noms <-
renglones[is.na(renglones$poliza),c('archivo','poliza')]?
> salida <- "/renombra/SinPoliza/"?
> for (i in 1:length(noms[,1])){?
>  pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
>  pto <- paste(ruta,salida,noms[i,'archivo'],sep='')?
>  if (!file.exists(pto)){?
>    file.copy(from = pfrom,to=pto)?
>  }?
> }?
> ?
> View(renglones)?
> write.csv(renglones,
file='//192.168.1.83/share/CERT/CERTIFICADOS_INDIVIDUALES_295_UMAM/Renombra/CERTIFICADOS_INDIVIDUALES_295_UMAM.csv')?
> ?
> ?
> ?
> 
> 
> 	[[alternative HTML version deleted]]
> 
> _______________________________________________
> R-help-es mailing list
> R-help-es en r-project.org
> https://stat.ethz.ch/mailman/listinfo/r-help-es

Carlos Ortega

2019-Sep-24 07:09 UTC

head link

[R-es] Consulta

Hola,

Una cosa que puedes hacer temporalmente es desinstalar la versión 3.6.1 y
volver a  la anterior 3.6.0.
Los paquetes tardan un tiempo, suele ser poco, en adaptarse a las nuevas
versiones.

Pero vaya, en cualquier caso, 3.6.1 ya lleva un tiempo liberada y esta
adaptación se debiera de haber producido.
Si nos das más detalles del error que tienes y en qué paso de tu script
aparece el error, podríamos ayudarte más.

Gracias,
Carlos Ortega
www.qualityexcellence.es

El mar., 24 sept. 2019 a las 1:49, BERENICE DOMINGUEZ SANCHEZ (<
ds_bere en hotmail.com>) escribió:
> Buenas tarde a todo en s:
>
> Tenia la versión de R 3.6 y utilizaba la paquetería de pdftools para
> extraer información de archivos en pdf actualice la versión 3.6.1 y ya no
> reconoce la paquetería alguien que me pueda ayudar. Prácticamente no
> reconoce las funciones de pdftools
>
> library(pdftools)
> library(stringr)?
> library(NLP)?
> library(tm)?
> library(tesseract)?
> library(magick)?
> install.packages("magick")?
> install.packages("pdftools")?
> ?
> txt <- system.file("texts", "txt", package =
"tm")?
> ?
> rfc_rg <- "([A-Z]{3,})([0-9]{6})([A-Z]|[0-9]){0,3}"?
> #poliza_rg <-
>
"(34|36|37|39)(ME|MEC|CH|MB|TF|GI|VE|TS|IM|ER|VE)*([0-9]{6,})[-]([0-9]){2}[-][A-Z]"?
> poliza_rg <-
"(ME|CH|MB|TF|GI|gi|VE|TS|IM|ER|VE)*([0-9]{8,})[-]([0-9]){2}"?
> registro_rg <- "(CNSF-H0711-)([0-9]{4})[- ]([0-9]){4}"?
> subgrupo_rg <- "_([0-9]){1,3}."?
> mon_rg <- "SMGM|UMAM|MN"?
> ?
> ?
> ruta <- 'C:/Users/bdominguez/Documents/H0711/Bond/1909/'?
> archivos<-list.files(path=ruta,pattern = '*.pdf')?
> ?
> ?
> imagen <-
image_read_pdf(path=paste(ruta,"/",nombre,".pdf",sep=""))?
> prueba <-image_ocr(imagen, language = 'eng')?
> lineas<-unlist(str_split(prueba,pattern = "\n"))?
> lineasp<-unlist(str_split(prueba[2],pattern = "\r\n"))?
> ?
> newnom <- NULL?
> renglones <- NULL?
> for (nombre in archivos){?
>   subgrupo <- str_extract(str_extract(nombre,pattern >
subgrupo_rg),pattern = "[0-9]{1,3}")?
>   imagen <-
image_read_pdf(path=paste(ruta,"/",nombre,".pdf",sep=""))?
>   prueba <-image_ocr(imagen, language = 'eng')?
>   lineas<-unlist(str_split(prueba,pattern = "\n"))?
>   poliza <- NULL?
>   poliza<-str_extract(lineas[1],poliza_rg)?
>   newnom <-
c(newnom,paste(poliza[1],substr(nombre,5,6),".pdf",sep=''))?
>   ?
>   registro <- NULL?
>   registro<-str_extract(lineas[49],registro_rg)?
>   ?
>   rfc <- NULL?
>   rfc <- str_extract(lineas[5],rfc_rg)?
>   ?
>   ?
>   #lineasnew<-unlist(str_split(lineas[2],pattern = "\r\n"))?
>   #lineasdosnew<-unlist(str_split(lineas[1],pattern =
"\r\n"))?
>   ?
>   cobertura <- NA?
>   extranjera <- NA?
>   suma_str   <- NA?
>   deducible_str <- NA?
>   ?
>   suma <- NA?
>   coaseguro <- NA?
>   deducible <- NA?
>   tope <- NA?
>   mon <- NA?
>   mondedu <- NA?
>   ?
>   cobertura  <- grep("Cobertura en el
Extranjero",lineas,value=TRUE)?
>   extranjera <- grep("Emergencia en el
Extranjero",lineas,value=TRUE)?
>   suma_str   <- grep("SUMA ASEGURADA:",lineas,value=TRUE)?
>   deducible_str   <- grep("DEDUCIBLE:",lineas,value=TRUE)?
>   sumacob <- NA?
>   sumaext <- NA?
>   ?
>   pprimaria <- grep("Numero de Póliza:", lineas, value =
TRUE)?
>   dnprimariaa <- grep("Nombre de la Aseguradora Primaria:",
lineas, value
> = TRUE)?
>   ?
>   #cer<- grep("Certificado No. ",lineas, value=TRUE)?
>   #ntit<- grep("Ramo", lineas, value=TRUE)?
>   ?
>   sumacob<-as.numeric(str_extract(cobertura[1],pattern =
"[0-9]{1,}"))?
>   if (length(sumacob)==0){?
>     sumacob = NA?
>   }?
>   ?
>   sumaext<-as.numeric(str_extract(extranjera[17],pattern =
"[0-9]{1,}"))?
>   if (length(sumaext)==0){?
>     sumaext = NA?
>   }?
>   valores <- NULL?
>   monedas <- NULL?
>   valores <- str_extract_all(suma_str[17],pattern >
"[0-9]{0,3},*[0-9]{0,3},*[0-9]{1,3}(.[0-9]{1,}){0,1}",simplify=TRUE)?
>   monedas <- str_extract(suma_str,pattern = mon_rg)?
>   if (length(valores[1])==0){?
>     suma = NA?
>     mon = NA?
>   }else{?
>     suma = as.numeric(gsub(pattern = ",*",replacement =
"",valores[1]))?
>     mon <- as.character(monedas[1])?
>   }?
>   ?
>   if (length(valores[2])==0){?
>     coaseguro = NA?
>   }else{?
>     coaseguro = as.numeric(valores[2])?
>   }?
>   valores <- NULL?
>   valores <- str_extract_all(deducible_str[1],pattern >
"[0-9]{0,3},*[0-9]{0,3},*[0-9]{1,3}(.[0-9]{1,}){0,1}",simplify=TRUE)?
>   ?
>   if (length(valores[1])==0){?
>     deducible <- NA?
>   }else{?
>     deducible <- as.numeric(gsub(pattern = ",",replacement
> "",valores[1]))?
>   }?
>   ?
>   monedas <- NULL  ?
>   monedas <- str_extract(deducible_str[1],pattern = mon_rg)?
>   ?
>   if (length(monedas)==0){?
>     mondedu <- NA?
>   }else{?
>     mondedu <- monedas?
>   }?
>   ?
>   ?
>   if (length(valores[2])==0){?
>     tope = NA?
>   }else{?
>     tope = as.numeric(gsub(pattern = ",",replacement =
"",valores[2]))?
>   }?
>   ?
>   renglon <-
>
data.frame(archivo=nombre,poliza=as.character(poliza[1]),cobertura=sumacob,emergencia=sumaext,registro=registro[1],suma=suma,coaseguro=coaseguro,deducible=deducible,tope=tope,rfc=rfc,mon=mon,mondedu=mondedu,subgrupo=subgrupo,
> cert=as.character(cer[1]), cer_tit=as.character(lineasdos[14]),
> titu=as.character(lineasdos[10]))?
>   renglones <- rbind(renglones,renglon)?
> }?
> ?
> # Con los datos del data frame renombra los archivos hay que crear los
> subdirectorios?
> ?
> noms <- data.frame(archivo=archivos,poliza=newnom)?
> ?
> noms <-
renglones[!is.na(renglones$poliza),c('archivo','cer_tit')]?
> ungrupo<-sqldf("select poliza,count(cert) from noms group by 1 
having
> count(cert) <= 1 ")?
> noms<-sqldf("select * from noms where poliza in (select poliza from
> ungrupo)")?
> length(noms$archivo)?
> salida <- "/renombra/"?
> ?
> for (i in 1:length(noms[,1])){?
>   if (!is.na(noms[i,'cer_tit'])){?
>     pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
>     pto <-
paste(ruta,salida,noms[i,'cer_tit'],'.pdf',sep='')?
>     if (!file.exists(pto)){?
>       file.copy(from = pfrom,to=pto)?
>     }?
>   }?
> }?
> ?
> #Ahora las polizas con subgrupos?
> noms <- renglones[!is.na
>
(renglones$poliza),c('archivo','poliza','subgrupo')]?
> ungrupo<-sqldf("select poliza,count(subgrupo) from noms group by 1 
having
> count(subgrupo) > 1 ")?
> noms<-sqldf("select * from noms where poliza in (select poliza from
> ungrupo) order by poliza")?
> length(noms$archivo)?
> salida <- "/Renombra/ConGrupos/"?
> ?
> for (i in 1:length(noms[,1])){?
>   if (!is.na(noms[i,'poliza'])){?
>     pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
>     pto <-
>
paste(ruta,salida,noms[i,'poliza'],'_',noms[i,'subgrupo'],'.pdf',sep='')?
>     if (!file.exists(pto)){?
>       file.copy(from = pfrom,to=pto)?
>     }?
>   }?
> }?
> ?
> salida <- "/Renombra/Grupos/"?
> dirActual <- getwd()?
> polizas_con_grupos <- as.character(sqldf("select poliza from
> ungrupo")$poliza)?
> setwd(ruta)?
> for (policita in polizas_con_grupos){?
>   archivos <- as.character(sqldf(paste("select archivo from
renglones
> where poliza like
'%",policita,"%'",sep=''))$archivo)?
>   staple_pdf(input_files = archivos,output_filepath >
paste(ruta,salida,policita,".pdf",sep='')) ?
> }?
> setwd(dirActual)?
> ?
> #Ahora los PDf sin poliza?
> noms <-
renglones[is.na(renglones$poliza),c('archivo','poliza')]?
> salida <- "/renombra/SinPoliza/"?
> for (i in 1:length(noms[,1])){?
>   pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
>   pto <- paste(ruta,salida,noms[i,'archivo'],sep='')?
>   if (!file.exists(pto)){?
>     file.copy(from = pfrom,to=pto)?
>   }?
> }?
> ?
> View(renglones)?
> write.csv(renglones, file='//
>
192.168.1.83/share/CERT/CERTIFICADOS_INDIVIDUALES_295_UMAM/Renombra/CERTIFICADOS_INDIVIDUALES_295_UMAM.csv')
> ?
> ?
> ?
> ?
>
>
>         [[alternative HTML version deleted]]
>
> _______________________________________________
> R-help-es mailing list
> R-help-es en r-project.org
> https://stat.ethz.ch/mailman/listinfo/r-help-es
>

-- 
Saludos,
Carlos Ortega
www.qualityexcellence.es

	[[alternative HTML version deleted]]

BERENICE DOMINGUEZ SANCHEZ

2019-Sep-24 14:50 UTC

head link

[R-es] Consulta

Emilio

Buen día, si me manda un error muy especifico que no reconoce una función de
pdftools
> imagen <-
image_read_pdf(path=paste(ruta,"/",nombre,".pdf",sep=""))Error in normalizePath(path.expand(path), winslash, mustWork) : ?
  path[1]="C:\Users\bdominguez\Documents\H0711\Bond\1907\2\.pdf": El
sistema no puede encontrar el archivo especificado?> prueba <-image_ocr(imagen, language = 'eng')?Error in assert_image(image) : object 'imagen' not
found?> lineas<-unlist(str_split(prueba,pattern = "\n"))?Error in stri_split_regex(string, pattern, n = n, simplify = simplify,  : ?
  object 'prueba' not found?> lineasp<-unlist(str_split(prueba[2],pattern = "\r\n"))?Error in stri_split_regex(string, pattern, n = n, simplify = simplify,  : ?
  object 'prueba' not found

  Error in normalizePath(path.expand(path), winslash, mustWork) :
 
path[1]="C:\Users\bdominguez\Documents\H0711\Bond\1907\2\caratula_340007195-01-C_6158_22_07_2019_162916.pdf.pdf":
El sistema no puede encontrar el archivo especificad?


Adjunto el archivo, respecto a la versión fue solo una hipótesis

Hice dos cosas:

Lo volví a instalar pero no tuve éxito, adjunto archivo.

Saludos.

________________________________
De: Emilio L. Cano <emilopezcano en gmail.com>
Enviado: lunes, 23 de septiembre de 2019 11:56 p. m.
Para: BERENICE DOMINGUEZ SANCHEZ <ds_bere en hotmail.com>
CC: Lista R <r-help-es en r-project.org>
Asunto: Re: [R-es] Consulta

Hola Berenice,

¿Qué quires decir con que no reconoce el paquete? ¿Te da algún mensaje de error?
No sé si has probado a instalar de nuevo el paquete, si no hazlo.

Para poder reproducir el error con tu código haría falta alguno de los pdfs que
utilizas (puedes compartir un enlace a dropbox o similar).

Un saludo,
Emilio
> El 24 sept 2019, a las 1:49, BERENICE DOMINGUEZ SANCHEZ <ds_bere en
hotmail.com> escribió:
>
> Buenas tarde a todo en s:
>
> Tenia la versión de R 3.6 y utilizaba la paquetería de pdftools para
extraer información de archivos en pdf actualice la versión 3.6.1 y ya no
reconoce la paquetería alguien que me pueda ayudar. Prácticamente no reconoce
las funciones de pdftools
>
> library(pdftools)
> library(stringr)?
> library(NLP)?
> library(tm)?
> library(tesseract)?
> library(magick)?
> install.packages("magick")?
> install.packages("pdftools")?
> ?
> txt <- system.file("texts", "txt", package =
"tm")?
> ?
> rfc_rg <- "([A-Z]{3,})([0-9]{6})([A-Z]|[0-9]){0,3}"?
> #poliza_rg <-
"(34|36|37|39)(ME|MEC|CH|MB|TF|GI|VE|TS|IM|ER|VE)*([0-9]{6,})[-]([0-9]){2}[-][A-Z]"?
> poliza_rg <-
"(ME|CH|MB|TF|GI|gi|VE|TS|IM|ER|VE)*([0-9]{8,})[-]([0-9]){2}"?
> registro_rg <- "(CNSF-H0711-)([0-9]{4})[- ]([0-9]){4}"?
> subgrupo_rg <- "_([0-9]){1,3}."?
> mon_rg <- "SMGM|UMAM|MN"?
> ?
> ?
> ruta <- 'C:/Users/bdominguez/Documents/H0711/Bond/1909/'?
> archivos<-list.files(path=ruta,pattern = '*.pdf')?
> ?
> ?
> imagen <-
image_read_pdf(path=paste(ruta,"/",nombre,".pdf",sep=""))?
> prueba <-image_ocr(imagen, language = 'eng')?
> lineas<-unlist(str_split(prueba,pattern = "\n"))?
> lineasp<-unlist(str_split(prueba[2],pattern = "\r\n"))?
> ?
> newnom <- NULL?
> renglones <- NULL?
> for (nombre in archivos){?
>  subgrupo <- str_extract(str_extract(nombre,pattern =
subgrupo_rg),pattern = "[0-9]{1,3}")?
>  imagen <-
image_read_pdf(path=paste(ruta,"/",nombre,".pdf",sep=""))?
>  prueba <-image_ocr(imagen, language = 'eng')?
>  lineas<-unlist(str_split(prueba,pattern = "\n"))?
>  poliza <- NULL?
>  poliza<-str_extract(lineas[1],poliza_rg)?
>  newnom <-
c(newnom,paste(poliza[1],substr(nombre,5,6),".pdf",sep=''))?
>  ?
>  registro <- NULL?
>  registro<-str_extract(lineas[49],registro_rg)?
>  ?
>  rfc <- NULL?
>  rfc <- str_extract(lineas[5],rfc_rg)?
>  ?
>  ?
>  #lineasnew<-unlist(str_split(lineas[2],pattern = "\r\n"))?
>  #lineasdosnew<-unlist(str_split(lineas[1],pattern = "\r\n"))?
>  ?
>  cobertura <- NA?
>  extranjera <- NA?
>  suma_str   <- NA?
>  deducible_str <- NA?
>  ?
>  suma <- NA?
>  coaseguro <- NA?
>  deducible <- NA?
>  tope <- NA?
>  mon <- NA?
>  mondedu <- NA?
>  ?
>  cobertura  <- grep("Cobertura en el
Extranjero",lineas,value=TRUE)?
>  extranjera <- grep("Emergencia en el
Extranjero",lineas,value=TRUE)?
>  suma_str   <- grep("SUMA ASEGURADA:",lineas,value=TRUE)?
>  deducible_str   <- grep("DEDUCIBLE:",lineas,value=TRUE)?
>  sumacob <- NA?
>  sumaext <- NA?
>  ?
>  pprimaria <- grep("Numero de Póliza:", lineas, value = TRUE)?
>  dnprimariaa <- grep("Nombre de la Aseguradora Primaria:",
lineas, value = TRUE)?
>  ?
>  #cer<- grep("Certificado No. ",lineas, value=TRUE)?
>  #ntit<- grep("Ramo", lineas, value=TRUE)?
>  ?
>  sumacob<-as.numeric(str_extract(cobertura[1],pattern =
"[0-9]{1,}"))?
>  if (length(sumacob)==0){?
>    sumacob = NA?
>  }?
>  ?
>  sumaext<-as.numeric(str_extract(extranjera[17],pattern =
"[0-9]{1,}"))?
>  if (length(sumaext)==0){?
>    sumaext = NA?
>  }?
>  valores <- NULL?
>  monedas <- NULL?
>  valores <- str_extract_all(suma_str[17],pattern =
"[0-9]{0,3},*[0-9]{0,3},*[0-9]{1,3}(.[0-9]{1,}){0,1}",simplify=TRUE)?
>  monedas <- str_extract(suma_str,pattern = mon_rg)?
>  if (length(valores[1])==0){?
>    suma = NA?
>    mon = NA?
>  }else{?
>    suma = as.numeric(gsub(pattern = ",*",replacement =
"",valores[1]))?
>    mon <- as.character(monedas[1])?
>  }?
>  ?
>  if (length(valores[2])==0){?
>    coaseguro = NA?
>  }else{?
>    coaseguro = as.numeric(valores[2])?
>  }?
>  valores <- NULL?
>  valores <- str_extract_all(deducible_str[1],pattern =
"[0-9]{0,3},*[0-9]{0,3},*[0-9]{1,3}(.[0-9]{1,}){0,1}",simplify=TRUE)?
>  ?
>  if (length(valores[1])==0){?
>    deducible <- NA?
>  }else{?
>    deducible <- as.numeric(gsub(pattern = ",",replacement =
"",valores[1]))?
>  }?
>  ?
>  monedas <- NULL  ?
>  monedas <- str_extract(deducible_str[1],pattern = mon_rg)?
>  ?
>  if (length(monedas)==0){?
>    mondedu <- NA?
>  }else{?
>    mondedu <- monedas?
>  }?
>  ?
>  ?
>  if (length(valores[2])==0){?
>    tope = NA?
>  }else{?
>    tope = as.numeric(gsub(pattern = ",",replacement =
"",valores[2]))?
>  }?
>  ?
>  renglon <-
data.frame(archivo=nombre,poliza=as.character(poliza[1]),cobertura=sumacob,emergencia=sumaext,registro=registro[1],suma=suma,coaseguro=coaseguro,deducible=deducible,tope=tope,rfc=rfc,mon=mon,mondedu=mondedu,subgrupo=subgrupo,
cert=as.character(cer[1]), cer_tit=as.character(lineasdos[14]),
titu=as.character(lineasdos[10]))?
>  renglones <- rbind(renglones,renglon)?
> }?
> ?
> # Con los datos del data frame renombra los archivos hay que crear los
subdirectorios?
> ?
> noms <- data.frame(archivo=archivos,poliza=newnom)?
> ?
> noms <-
renglones[!is.na(renglones$poliza),c('archivo','cer_tit')]?
> ungrupo<-sqldf("select poliza,count(cert) from noms group by 1 
having count(cert) <= 1 ")?
> noms<-sqldf("select * from noms where poliza in (select poliza from
ungrupo)")?
> length(noms$archivo)?
> salida <- "/renombra/"?
> ?
> for (i in 1:length(noms[,1])){?
>  if (!is.na(noms[i,'cer_tit'])){?
>    pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
>    pto <-
paste(ruta,salida,noms[i,'cer_tit'],'.pdf',sep='')?
>    if (!file.exists(pto)){?
>      file.copy(from = pfrom,to=pto)?
>    }?
>  }?
> }?
> ?
> #Ahora las polizas con subgrupos?
> noms <-
renglones[!is.na(renglones$poliza),c('archivo','poliza','subgrupo')]?
> ungrupo<-sqldf("select poliza,count(subgrupo) from noms group by 1 
having count(subgrupo) > 1 ")?
> noms<-sqldf("select * from noms where poliza in (select poliza from
ungrupo) order by poliza")?
> length(noms$archivo)?
> salida <- "/Renombra/ConGrupos/"?
> ?
> for (i in 1:length(noms[,1])){?
>  if (!is.na(noms[i,'poliza'])){?
>    pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
>    pto <-
paste(ruta,salida,noms[i,'poliza'],'_',noms[i,'subgrupo'],'.pdf',sep='')?
>    if (!file.exists(pto)){?
>      file.copy(from = pfrom,to=pto)?
>    }?
>  }?
> }?
> ?
> salida <- "/Renombra/Grupos/"?
> dirActual <- getwd()?
> polizas_con_grupos <- as.character(sqldf("select poliza from
ungrupo")$poliza)?
> setwd(ruta)?
> for (policita in polizas_con_grupos){?
>  archivos <- as.character(sqldf(paste("select archivo from
renglones where poliza like
'%",policita,"%'",sep=''))$archivo)?
>  staple_pdf(input_files = archivos,output_filepath =
paste(ruta,salida,policita,".pdf",sep='')) ?
> }?
> setwd(dirActual)?
> ?
> #Ahora los PDf sin poliza?
> noms <-
renglones[is.na(renglones$poliza),c('archivo','poliza')]?
> salida <- "/renombra/SinPoliza/"?
> for (i in 1:length(noms[,1])){?
>  pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
>  pto <- paste(ruta,salida,noms[i,'archivo'],sep='')?
>  if (!file.exists(pto)){?
>    file.copy(from = pfrom,to=pto)?
>  }?
> }?
> ?
> View(renglones)?
> write.csv(renglones,
file='//192.168.1.83/share/CERT/CERTIFICADOS_INDIVIDUALES_295_UMAM/Renombra/CERTIFICADOS_INDIVIDUALES_295_UMAM.csv')?
> ?
> ?
> ?
>
>
>        [[alternative HTML version deleted]]
>
> _______________________________________________
> R-help-es mailing list
> R-help-es en r-project.org
> https://stat.ethz.ch/mailman/listinfo/r-help-es

	[[alternative HTML version deleted]]

Emilio L. Cano

2019-Sep-24 15:19 UTC

head link

[R-es] Consulta

Hola,
Estos mensajes de error no son por el paquete pdftools. Las funciones
image_read_pdf e image_ocr son del paquete magick. ¿Lo tienes instalado? ¿Lo
tienes cargado? Según el script sí, pero asegúrate de que library(magick) no da
ningún error.
La otra función ?str_split? es del paquete stringr, pasa lo mismo: tiene que
estar cargado para poder utilizar sus funciones.

Saludos,
Emilio
> El 24 sept 2019, a las 16:20, BERENICE DOMINGUEZ SANCHEZ <ds_bere en
hotmail.com> escribió:
> 
> Emilio
> 
> Buen día, si me manda un error muy especifico que no reconoce una función
de pdftools
> 
> <image.png>
> 
> Adjunto el archivo, respecto a la versión fue solo una hipótesis
> 
> Hice dos cosas:
> 
> Lo volví a instalar pero no tuve éxito, adjunto archivo.
> 
> Saludos.
>   
> 
> De: Emilio L. Cano <emilopezcano en gmail.com <mailto:emilopezcano en
gmail.com>>
> Enviado: lunes, 23 de septiembre de 2019 11:56 p. m.
> Para: BERENICE DOMINGUEZ SANCHEZ <ds_bere en hotmail.com>
> CC: Lista R <r-help-es en r-project.org>
> Asunto: Re: [R-es] Consulta
>  
> Hola Berenice,
> 
> ¿Qué quires decir con que no reconoce el paquete? ¿Te da algún mensaje de
error?
> No sé si has probado a instalar de nuevo el paquete, si no hazlo.
> 
> Para poder reproducir el error con tu código haría falta alguno de los pdfs
que utilizas (puedes compartir un enlace a dropbox o similar).
> 
> Un saludo,
> Emilio
> 
> > El 24 sept 2019, a las 1:49, BERENICE DOMINGUEZ SANCHEZ <ds_bere en
hotmail.com> escribió:
> > 
> > Buenas tarde a todo en s:
> > 
> > Tenia la versión de R 3.6 y utilizaba la paquetería de pdftools para
extraer información de archivos en pdf actualice la versión 3.6.1 y ya no
reconoce la paquetería alguien que me pueda ayudar. Prácticamente no reconoce
las funciones de pdftools
> > 
> > library(pdftools)
> > library(stringr)?
> > library(NLP)?
> > library(tm)?
> > library(tesseract)?
> > library(magick)?
> > install.packages("magick")?
> > install.packages("pdftools")?
> > ?
> > txt <- system.file("texts", "txt", package =
"tm")?
> > ?
> > rfc_rg <- "([A-Z]{3,})([0-9]{6})([A-Z]|[0-9]){0,3}"?
> > #poliza_rg <-
"(34|36|37|39)(ME|MEC|CH|MB|TF|GI|VE|TS|IM|ER|VE)*([0-9]{6,})[-]([0-9]){2}[-][A-Z]"?
> > poliza_rg <-
"(ME|CH|MB|TF|GI|gi|VE|TS|IM|ER|VE)*([0-9]{8,})[-]([0-9]){2}"?
> > registro_rg <- "(CNSF-H0711-)([0-9]{4})[- ]([0-9]){4}"?
> > subgrupo_rg <- "_([0-9]){1,3}."?
> > mon_rg <- "SMGM|UMAM|MN"?
> > ?
> > ?
> > ruta <- 'C:/Users/bdominguez/Documents/H0711/Bond/1909/'?
> > archivos<-list.files(path=ruta,pattern = '*.pdf')?
> > ?
> > ?
> > imagen <-
image_read_pdf(path=paste(ruta,"/",nombre,".pdf",sep=""))?
> > prueba <-image_ocr(imagen, language = 'eng')?
> > lineas<-unlist(str_split(prueba,pattern = "\n"))?
> > lineasp<-unlist(str_split(prueba[2],pattern = "\r\n"))?
> > ?
> > newnom <- NULL?
> > renglones <- NULL?
> > for (nombre in archivos){?
> >  subgrupo <- str_extract(str_extract(nombre,pattern =
subgrupo_rg),pattern = "[0-9]{1,3}")?
> >  imagen <-
image_read_pdf(path=paste(ruta,"/",nombre,".pdf",sep=""))?
> >  prueba <-image_ocr(imagen, language = 'eng')?
> >  lineas<-unlist(str_split(prueba,pattern = "\n"))?
> >  poliza <- NULL?
> >  poliza<-str_extract(lineas[1],poliza_rg)?
> >  newnom <-
c(newnom,paste(poliza[1],substr(nombre,5,6),".pdf",sep=''))?
> >  ?
> >  registro <- NULL?
> >  registro<-str_extract(lineas[49],registro_rg)?
> >  ?
> >  rfc <- NULL?
> >  rfc <- str_extract(lineas[5],rfc_rg)?
> >  ?
> >  ?
> >  #lineasnew<-unlist(str_split(lineas[2],pattern =
"\r\n"))?
> >  #lineasdosnew<-unlist(str_split(lineas[1],pattern =
"\r\n"))?
> >  ?
> >  cobertura <- NA?
> >  extranjera <- NA?
> >  suma_str   <- NA?
> >  deducible_str <- NA?
> >  ?
> >  suma <- NA?
> >  coaseguro <- NA?
> >  deducible <- NA?
> >  tope <- NA?
> >  mon <- NA?
> >  mondedu <- NA?
> >  ?
> >  cobertura  <- grep("Cobertura en el
Extranjero",lineas,value=TRUE)?
> >  extranjera <- grep("Emergencia en el
Extranjero",lineas,value=TRUE)?
> >  suma_str   <- grep("SUMA ASEGURADA:",lineas,value=TRUE)?
> >  deducible_str   <- grep("DEDUCIBLE:",lineas,value=TRUE)?
> >  sumacob <- NA?
> >  sumaext <- NA?
> >  ?
> >  pprimaria <- grep("Numero de Póliza:", lineas, value =
TRUE)?
> >  dnprimariaa <- grep("Nombre de la Aseguradora
Primaria:", lineas, value = TRUE)?
> >  ?
> >  #cer<- grep("Certificado No. ",lineas, value=TRUE)?
> >  #ntit<- grep("Ramo", lineas, value=TRUE)?
> >  ?
> >  sumacob<-as.numeric(str_extract(cobertura[1],pattern =
"[0-9]{1,}"))?
> >  if (length(sumacob)==0){?
> >    sumacob = NA?
> >  }?
> >  ?
> >  sumaext<-as.numeric(str_extract(extranjera[17],pattern =
"[0-9]{1,}"))?
> >  if (length(sumaext)==0){?
> >    sumaext = NA?
> >  }?
> >  valores <- NULL?
> >  monedas <- NULL?
> >  valores <- str_extract_all(suma_str[17],pattern =
"[0-9]{0,3},*[0-9]{0,3},*[0-9]{1,3}(.[0-9]{1,}){0,1}",simplify=TRUE)?
> >  monedas <- str_extract(suma_str,pattern = mon_rg)?
> >  if (length(valores[1])==0){?
> >    suma = NA?
> >    mon = NA?
> >  }else{?
> >    suma = as.numeric(gsub(pattern = ",*",replacement =
"",valores[1]))?
> >    mon <- as.character(monedas[1])?
> >  }?
> >  ?
> >  if (length(valores[2])==0){?
> >    coaseguro = NA?
> >  }else{?
> >    coaseguro = as.numeric(valores[2])?
> >  }?
> >  valores <- NULL?
> >  valores <- str_extract_all(deducible_str[1],pattern =
"[0-9]{0,3},*[0-9]{0,3},*[0-9]{1,3}(.[0-9]{1,}){0,1}",simplify=TRUE)?
> >  ?
> >  if (length(valores[1])==0){?
> >    deducible <- NA?
> >  }else{?
> >    deducible <- as.numeric(gsub(pattern = ",",replacement
= "",valores[1]))?
> >  }?
> >  ?
> >  monedas <- NULL  ?
> >  monedas <- str_extract(deducible_str[1],pattern = mon_rg)?
> >  ?
> >  if (length(monedas)==0){?
> >    mondedu <- NA?
> >  }else{?
> >    mondedu <- monedas?
> >  }?
> >  ?
> >  ?
> >  if (length(valores[2])==0){?
> >    tope = NA?
> >  }else{?
> >    tope = as.numeric(gsub(pattern = ",",replacement =
"",valores[2]))?
> >  }?
> >  ?
> >  renglon <-
data.frame(archivo=nombre,poliza=as.character(poliza[1]),cobertura=sumacob,emergencia=sumaext,registro=registro[1],suma=suma,coaseguro=coaseguro,deducible=deducible,tope=tope,rfc=rfc,mon=mon,mondedu=mondedu,subgrupo=subgrupo,
cert=as.character(cer[1]), cer_tit=as.character(lineasdos[14]),
titu=as.character(lineasdos[10]))?
> >  renglones <- rbind(renglones,renglon)?
> > }?
> > ?
> > # Con los datos del data frame renombra los archivos hay que crear los
subdirectorios?
> > ?
> > noms <- data.frame(archivo=archivos,poliza=newnom)?
> > ?
> > noms <-
renglones[!is.na(renglones$poliza),c('archivo','cer_tit')]?
> > ungrupo<-sqldf("select poliza,count(cert) from noms group by 1
having count(cert) <= 1 ")?
> > noms<-sqldf("select * from noms where poliza in (select poliza
from ungrupo)")?
> > length(noms$archivo)?
> > salida <- "/renombra/"?
> > ?
> > for (i in 1:length(noms[,1])){?
> >  if (!is.na(noms[i,'cer_tit'])){?
> >    pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
> >    pto <-
paste(ruta,salida,noms[i,'cer_tit'],'.pdf',sep='')?
> >    if (!file.exists(pto)){?
> >      file.copy(from = pfrom,to=pto)?
> >    }?
> >  }?
> > }?
> > ?
> > #Ahora las polizas con subgrupos?
> > noms <-
renglones[!is.na(renglones$poliza),c('archivo','poliza','subgrupo')]?
> > ungrupo<-sqldf("select poliza,count(subgrupo) from noms group
by 1  having count(subgrupo) > 1 ")?
> > noms<-sqldf("select * from noms where poliza in (select poliza
from ungrupo) order by poliza")?
> > length(noms$archivo)?
> > salida <- "/Renombra/ConGrupos/"?
> > ?
> > for (i in 1:length(noms[,1])){?
> >  if (!is.na(noms[i,'poliza'])){?
> >    pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
> >    pto <-
paste(ruta,salida,noms[i,'poliza'],'_',noms[i,'subgrupo'],'.pdf',sep='')?
> >    if (!file.exists(pto)){?
> >      file.copy(from = pfrom,to=pto)?
> >    }?
> >  }?
> > }?
> > ?
> > salida <- "/Renombra/Grupos/"?
> > dirActual <- getwd()?
> > polizas_con_grupos <- as.character(sqldf("select poliza from
ungrupo")$poliza)?
> > setwd(ruta)?
> > for (policita in polizas_con_grupos){?
> >  archivos <- as.character(sqldf(paste("select archivo from
renglones where poliza like
'%",policita,"%'",sep=''))$archivo)?
> >  staple_pdf(input_files = archivos,output_filepath =
paste(ruta,salida,policita,".pdf",sep='')) ?
> > }?
> > setwd(dirActual)?
> > ?
> > #Ahora los PDf sin poliza?
> > noms <-
renglones[is.na(renglones$poliza),c('archivo','poliza')]?
> > salida <- "/renombra/SinPoliza/"?
> > for (i in 1:length(noms[,1])){?
> >  pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
> >  pto <-
paste(ruta,salida,noms[i,'archivo'],sep='')?
> >  if (!file.exists(pto)){?
> >    file.copy(from = pfrom,to=pto)?
> >  }?
> > }?
> > ?
> > View(renglones)?
> > write.csv(renglones,
file='//192.168.1.83/share/CERT/CERTIFICADOS_INDIVIDUALES_295_UMAM/Renombra/CERTIFICADOS_INDIVIDUALES_295_UMAM.csv')?
> > ?
> > ?
> > ?
> > 
> > 
> >        [[alternative HTML version deleted]]
> > 
> > _______________________________________________
> > R-help-es mailing list
> > R-help-es en r-project.org
> > https://stat.ethz.ch/mailman/listinfo/r-help-es
<https://stat.ethz.ch/mailman/listinfo/r-help-es>
> 
> <caratula_34008612-01-B_6370_20_09_2019_141619.pdf>

	[[alternative HTML version deleted]]

BERENICE DOMINGUEZ SANCHEZ

2019-Sep-24 15:41 UTC

head link

[R-es] Consulta

Emilio

Ahora cuando quiero instalar los paquetes pdftools, magick y otros más me salen
el siguiente error


WARNING: Rtools is required to build R packages but is not currently installed.
Please download and install the appropriate version of Rtools before proceeding:

https://cran.rstudio.com/bin/windows/Rtools/
Installing package into ?C:/Users/bdominguez/Documents/R/win-library/3.6?
(as ?lib? is unspecified)
trying URL
'https://cran.rstudio.com/bin/windows/contrib/3.6/magick_2.2.zip'
Content type 'application/zip' length 20112845 bytes (19.2 MB)
downloaded 19.2 MB

package ?magick? successfully unpacked and MD5 sums checked

The downloaded binary packages are in
        C:\Users\bdominguez\AppData\Local\Temp\RtmpsrfGjz\downloaded_packages




________________________________
De: Emilio L. Cano <emilopezcano en gmail.com>
Enviado: martes, 24 de septiembre de 2019 10:19 a. m.
Para: BERENICE DOMINGUEZ SANCHEZ <ds_bere en hotmail.com>
CC: Lista R <r-help-es en r-project.org>
Asunto: Re: [R-es] Consulta

Hola,
Estos mensajes de error no son por el paquete pdftools. Las funciones
image_read_pdf e image_ocr son del paquete magick. ¿Lo tienes instalado? ¿Lo
tienes cargado? Según el script sí, pero asegúrate de que library(magick) no da
ningún error.
La otra función ?str_split? es del paquete stringr, pasa lo mismo: tiene que
estar cargado para poder utilizar sus funciones.

Saludos,
Emilio

El 24 sept 2019, a las 16:20, BERENICE DOMINGUEZ SANCHEZ <ds_bere en
hotmail.com<mailto:ds_bere en hotmail.com>> escribió:

Emilio

Buen día, si me manda un error muy especifico que no reconoce una función de
pdftools

<image.png>

Adjunto el archivo, respecto a la versión fue solo una hipótesis

Hice dos cosas:

Lo volví a instalar pero no tuve éxito, adjunto archivo.

Saludos.


________________________________
De: Emilio L. Cano <emilopezcano en gmail.com<mailto:emilopezcano en
gmail.com>>
Enviado: lunes, 23 de septiembre de 2019 11:56 p. m.
Para: BERENICE DOMINGUEZ SANCHEZ <ds_bere en hotmail.com<mailto:ds_bere en
hotmail.com>>
CC: Lista R <r-help-es en r-project.org<mailto:r-help-es en
r-project.org>>
Asunto: Re: [R-es] Consulta

Hola Berenice,

¿Qué quires decir con que no reconoce el paquete? ¿Te da algún mensaje de error?
No sé si has probado a instalar de nuevo el paquete, si no hazlo.

Para poder reproducir el error con tu código haría falta alguno de los pdfs que
utilizas (puedes compartir un enlace a dropbox o similar).

Un saludo,
Emilio
> El 24 sept 2019, a las 1:49, BERENICE DOMINGUEZ SANCHEZ <ds_bere en
hotmail.com<mailto:ds_bere en hotmail.com>> escribió:
>
> Buenas tarde a todo en s:
>
> Tenia la versión de R 3.6 y utilizaba la paquetería de pdftools para
extraer información de archivos en pdf actualice la versión 3.6.1 y ya no
reconoce la paquetería alguien que me pueda ayudar. Prácticamente no reconoce
las funciones de pdftools
>
> library(pdftools)
> library(stringr)?
> library(NLP)?
> library(tm)?
> library(tesseract)?
> library(magick)?
> install.packages("magick")?
> install.packages("pdftools")?
> ?
> txt <- system.file("texts", "txt", package =
"tm")?
> ?
> rfc_rg <- "([A-Z]{3,})([0-9]{6})([A-Z]|[0-9]){0,3}"?
> #poliza_rg <-
"(34|36|37|39)(ME|MEC|CH|MB|TF|GI|VE|TS|IM|ER|VE)*([0-9]{6,})[-]([0-9]){2}[-][A-Z]"?
> poliza_rg <-
"(ME|CH|MB|TF|GI|gi|VE|TS|IM|ER|VE)*([0-9]{8,})[-]([0-9]){2}"?
> registro_rg <- "(CNSF-H0711-)([0-9]{4})[- ]([0-9]){4}"?
> subgrupo_rg <- "_([0-9]){1,3}."?
> mon_rg <- "SMGM|UMAM|MN"?
> ?
> ?
> ruta <- 'C:/Users/bdominguez/Documents/H0711/Bond/1909/'?
> archivos<-list.files(path=ruta,pattern = '*.pdf')?
> ?
> ?
> imagen <-
image_read_pdf(path=paste(ruta,"/",nombre,".pdf",sep=""))?
> prueba <-image_ocr(imagen, language = 'eng')?
> lineas<-unlist(str_split(prueba,pattern = "\n"))?
> lineasp<-unlist(str_split(prueba[2],pattern = "\r\n"))?
> ?
> newnom <- NULL?
> renglones <- NULL?
> for (nombre in archivos){?
>  subgrupo <- str_extract(str_extract(nombre,pattern =
subgrupo_rg),pattern = "[0-9]{1,3}")?
>  imagen <-
image_read_pdf(path=paste(ruta,"/",nombre,".pdf",sep=""))?
>  prueba <-image_ocr(imagen, language = 'eng')?
>  lineas<-unlist(str_split(prueba,pattern = "\n"))?
>  poliza <- NULL?
>  poliza<-str_extract(lineas[1],poliza_rg)?
>  newnom <-
c(newnom,paste(poliza[1],substr(nombre,5,6),".pdf",sep=''))?
>  ?
>  registro <- NULL?
>  registro<-str_extract(lineas[49],registro_rg)?
>  ?
>  rfc <- NULL?
>  rfc <- str_extract(lineas[5],rfc_rg)?
>  ?
>  ?
>  #lineasnew<-unlist(str_split(lineas[2],pattern = "\r\n"))?
>  #lineasdosnew<-unlist(str_split(lineas[1],pattern = "\r\n"))?
>  ?
>  cobertura <- NA?
>  extranjera <- NA?
>  suma_str   <- NA?
>  deducible_str <- NA?
>  ?
>  suma <- NA?
>  coaseguro <- NA?
>  deducible <- NA?
>  tope <- NA?
>  mon <- NA?
>  mondedu <- NA?
>  ?
>  cobertura  <- grep("Cobertura en el
Extranjero",lineas,value=TRUE)?
>  extranjera <- grep("Emergencia en el
Extranjero",lineas,value=TRUE)?
>  suma_str   <- grep("SUMA ASEGURADA:",lineas,value=TRUE)?
>  deducible_str   <- grep("DEDUCIBLE:",lineas,value=TRUE)?
>  sumacob <- NA?
>  sumaext <- NA?
>  ?
>  pprimaria <- grep("Numero de Póliza:", lineas, value = TRUE)?
>  dnprimariaa <- grep("Nombre de la Aseguradora Primaria:",
lineas, value = TRUE)?
>  ?
>  #cer<- grep("Certificado No. ",lineas, value=TRUE)?
>  #ntit<- grep("Ramo", lineas, value=TRUE)?
>  ?
>  sumacob<-as.numeric(str_extract(cobertura[1],pattern =
"[0-9]{1,}"))?
>  if (length(sumacob)==0){?
>    sumacob = NA?
>  }?
>  ?
>  sumaext<-as.numeric(str_extract(extranjera[17],pattern =
"[0-9]{1,}"))?
>  if (length(sumaext)==0){?
>    sumaext = NA?
>  }?
>  valores <- NULL?
>  monedas <- NULL?
>  valores <- str_extract_all(suma_str[17],pattern =
"[0-9]{0,3},*[0-9]{0,3},*[0-9]{1,3}(.[0-9]{1,}){0,1}",simplify=TRUE)?
>  monedas <- str_extract(suma_str,pattern = mon_rg)?
>  if (length(valores[1])==0){?
>    suma = NA?
>    mon = NA?
>  }else{?
>    suma = as.numeric(gsub(pattern = ",*",replacement =
"",valores[1]))?
>    mon <- as.character(monedas[1])?
>  }?
>  ?
>  if (length(valores[2])==0){?
>    coaseguro = NA?
>  }else{?
>    coaseguro = as.numeric(valores[2])?
>  }?
>  valores <- NULL?
>  valores <- str_extract_all(deducible_str[1],pattern =
"[0-9]{0,3},*[0-9]{0,3},*[0-9]{1,3}(.[0-9]{1,}){0,1}",simplify=TRUE)?
>  ?
>  if (length(valores[1])==0){?
>    deducible <- NA?
>  }else{?
>    deducible <- as.numeric(gsub(pattern = ",",replacement =
"",valores[1]))?
>  }?
>  ?
>  monedas <- NULL  ?
>  monedas <- str_extract(deducible_str[1],pattern = mon_rg)?
>  ?
>  if (length(monedas)==0){?
>    mondedu <- NA?
>  }else{?
>    mondedu <- monedas?
>  }?
>  ?
>  ?
>  if (length(valores[2])==0){?
>    tope = NA?
>  }else{?
>    tope = as.numeric(gsub(pattern = ",",replacement =
"",valores[2]))?
>  }?
>  ?
>  renglon <-
data.frame(archivo=nombre,poliza=as.character(poliza[1]),cobertura=sumacob,emergencia=sumaext,registro=registro[1],suma=suma,coaseguro=coaseguro,deducible=deducible,tope=tope,rfc=rfc,mon=mon,mondedu=mondedu,subgrupo=subgrupo,
cert=as.character(cer[1]), cer_tit=as.character(lineasdos[14]),
titu=as.character(lineasdos[10]))?
>  renglones <- rbind(renglones,renglon)?
> }?
> ?
> # Con los datos del data frame renombra los archivos hay que crear los
subdirectorios?
> ?
> noms <- data.frame(archivo=archivos,poliza=newnom)?
> ?
> noms <-
renglones[!is.na(renglones$poliza),c('archivo','cer_tit')]?
> ungrupo<-sqldf("select poliza,count(cert) from noms group by 1 
having count(cert) <= 1 ")?
> noms<-sqldf("select * from noms where poliza in (select poliza from
ungrupo)")?
> length(noms$archivo)?
> salida <- "/renombra/"?
> ?
> for (i in 1:length(noms[,1])){?
>  if (!is.na(noms[i,'cer_tit'])){?
>    pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
>    pto <-
paste(ruta,salida,noms[i,'cer_tit'],'.pdf',sep='')?
>    if (!file.exists(pto)){?
>      file.copy(from = pfrom,to=pto)?
>    }?
>  }?
> }?
> ?
> #Ahora las polizas con subgrupos?
> noms <-
renglones[!is.na(renglones$poliza),c('archivo','poliza','subgrupo')]?
> ungrupo<-sqldf("select poliza,count(subgrupo) from noms group by 1 
having count(subgrupo) > 1 ")?
> noms<-sqldf("select * from noms where poliza in (select poliza from
ungrupo) order by poliza")?
> length(noms$archivo)?
> salida <- "/Renombra/ConGrupos/"?
> ?
> for (i in 1:length(noms[,1])){?
>  if (!is.na(noms[i,'poliza'])){?
>    pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
>    pto <-
paste(ruta,salida,noms[i,'poliza'],'_',noms[i,'subgrupo'],'.pdf',sep='')?
>    if (!file.exists(pto)){?
>      file.copy(from = pfrom,to=pto)?
>    }?
>  }?
> }?
> ?
> salida <- "/Renombra/Grupos/"?
> dirActual <- getwd()?
> polizas_con_grupos <- as.character(sqldf("select poliza from
ungrupo")$poliza)?
> setwd(ruta)?
> for (policita in polizas_con_grupos){?
>  archivos <- as.character(sqldf(paste("select archivo from
renglones where poliza like
'%",policita,"%'",sep=''))$archivo)?
>  staple_pdf(input_files = archivos,output_filepath =
paste(ruta,salida,policita,".pdf",sep='')) ?
> }?
> setwd(dirActual)?
> ?
> #Ahora los PDf sin poliza?
> noms <-
renglones[is.na(renglones$poliza),c('archivo','poliza')]?
> salida <- "/renombra/SinPoliza/"?
> for (i in 1:length(noms[,1])){?
>  pfrom <-
paste(ruta,"/",noms[i,'archivo'],sep='')?
>  pto <- paste(ruta,salida,noms[i,'archivo'],sep='')?
>  if (!file.exists(pto)){?
>    file.copy(from = pfrom,to=pto)?
>  }?
> }?
> ?
> View(renglones)?
> write.csv(renglones,
file='//192.168.1.83/share/CERT/CERTIFICADOS_INDIVIDUALES_295_UMAM/Renombra/CERTIFICADOS_INDIVIDUALES_295_UMAM.csv')?
> ?
> ?
> ?
>
>
>        [[alternative HTML version deleted]]
>
> _______________________________________________
> R-help-es mailing list
> R-help-es en r-project.org<mailto:R-help-es en r-project.org>
> https://stat.ethz.ch/mailman/listinfo/r-help-es
<caratula_34008612-01-B_6370_20_09_2019_141619.pdf>


	[[alternative HTML version deleted]]

Apparently Analagous Threads

Search for more maybe matching threads

R help es - Sep 2019 - Consulta

[R-es] Consulta

[R-es] Consulta

[R-es] Consulta

[R-es] Consulta

[R-es] Consulta

[R-es] Consulta

Apparently Analagous Threads