Here is a start for two of them... PDF files are actually programs written in
the Postscript language... there are a lot of ways to write a program to put
marks on a page, so this mess is actually not as bad a result as you might have
encountered.
str(IDTpdf)
DF <- IDTpdf[[1]][ -(1:2), ]
names( DF ) <- c( "SNo", "Scrip", "Symbol",
"Leverage" )
DF$SNo <- as.integer( DF$SNo )
DF$Leverage <- as.numeric( DF$Leverage )
IDTpdf[[ 1 ]] <- DF
DF <- IDTpdf[[2]]
names( DF ) <- c( "SNo", "Scrip", "Symbol",
"Leverage" )
DF <- rbind(
data.frame(
SNo = 54
, Symbol = "DIVISLAB"
, Scrip = "DIVIS LABORATORIES LTD"
, Leverage = 4.5
)
, DF
)
IDTpdf[[ 2 ]] <- DF
do.call( rbind, IDTpdf[ 1:2 ] )
On January 8, 2023 12:59:58 AM PST, akshay kulkarni <akshay_e4 at
hotmail.com> wrote:>dear members,
> I am extracting a pdf table into a data frame
from this URL:
>
>https://www.canmoney.in/pdf/INTRADAYLEVERAGE-20220531-latest.pdf
>
>I am using extract_table() from the tabulizer package (it is archived and
have installed it from github)
>
>IDTpdf <-
extract_tables("https://www.canmoney.in/pdf/INTRADAYLEVERAGE-20220531-latest.pdf",output="data.frame")
>
>But IDTpdf consists of four different dfs, and I want to collapse them into
one. The dput of IDTpdf:
>
>list(structure(list(SCRIPS.AVAILABLE.FOR.INTRADAY.WITH.LEVERAGES.PROVIDED.ON.THEM
= c("S.No.",
>"times)", "1", "2", "3",
"4", "5", "6", "7", "8",
"9", "10",
>"11", "12", "13", "14",
"15", "16", "17", "18", "19",
"20", "21",
>"22", "23", "24", "25",
"26", "27", "28", "29", "30",
"31", "32",
>"33", "34", "35", "36",
"37", "38", "39", "40", "41",
"42", "43",
>"44", "45", "46", "47",
"48", "49", "50", "51", "52",
"53"),
> X = c("Scrip Name", "", "ALKEM LABORATORIES
LTD.", "ATUL LTD",
> "ABB INDIA LIMITED", "AARTI INDUSTRIES LTD",
"ABBOTT INDIA LIMITED",
> "ADITYA BIRLA CAPITAL LTD.", "ADITYA BIRLA FASHION &
RT",
> "ACC LIMITED", "ADANI ENTERPRISES LIMITED",
"ADANI PORT & SEZ LTD",
> "AMARA RAJA BATTERIES LTD.", "ASTRAL LIMITED",
"AMBUJA CEMENTS LTD",
> "ALEMBIC PHARMA LTD", "APOLLO HOSPITALS ENTER. L",
"APOLLO TYRES LTD",
> "ASHOK LEYLAND LTD", "ASIAN PAINTS LIMITED",
"AU SMALL FINANCE BANK LTD",
> "AUROBINDO PHARMA LTD", "AXIS BANK LIMITED",
"BAJAJ AUTO LIMITED",
> "BAJAJ FINSERV LTD.", "BAJAJ FINANCE LIMITED",
"BALRAMPUR CHINI MILLS LTD",
> "BANDHAN BANK LIMITED", "BANK OF BARODA", "BATA
INDIA LTD",
> "BHARAT ELECTRONICS LTD", "BERGER PAINTS (I) LTD",
"BHARTI AIRTEL LIMITED",
> "BHEL", "BIOCON LIMITED.", "BOSCH
LIMITED", "BRITANNIA INDUSTRIES LTD",
> "BIRLASOFT LIMITED", "ZYDUS LIFESCIENCES LTD",
"CANARA BANK",
> "CAN FIN HOMES LTD", "CHAMBAL FERTILIZERS LTD",
"CHOLAMANDALAM IN & FIN CO",
> "CIPLA LTD", "COAL INDIA LTD", "COFORGE
LIMITED", "COLGATE PALMOLIVE LTD.",
> "CONTAINER CORP OF IND LTD", "COROMANDEL INTERNTL.
LTD",
> "CROMPT GREA CON ELEC LTD", "CITY UNION BANK LTD",
"CUMMINS INDIA LTD",
> "DABUR INDIA LTD", "DEEPAK NITRITE LTD", "DELTA
CORP LIMITED"
> ), X.1 = c("Symbol Series", "", "ALKEM",
"ATUL", "ABB", "AARTIIND",
> "ABBOTINDIA", "ABCAPITAL", "ABFRL",
"ACC", "ADANIENT", "ADANIPORTS",
> "AMARAJABAT", "ASTRAL", "AMBUJACEM",
"APLLTD", "APOLLOHOSP",
> "APOLLOTYRE", "ASHOKLEY", "ASIANPAINT",
"AUBANK", "AUROPHARMA",
> "AXISBANK", "BAJAJ-AUTO", "BAJAJFINSV",
"BAJFINANCE", "BALRAMCHIN",
> "BANDHANBNK", "BANKBARODA", "BATAINDIA",
"BEL", "BERGEPAINT",
> "BHARTIARTL", "BHEL", "BIOCON",
"BOSCHLTD", "BRITANNIA",
> "BSOFT", "ZYDUSLIFE", "CANBK",
"CANFINHOME", "CHAMBLFERT",
> "CHOLAFIN", "CIPLA", "COALINDIA",
"COFORGE", "COLPAL", "CONCOR",
> "COROMANDEL", "CROMPTON", "CUB",
"CUMMINSIND", "DABUR", "DEEPAKNTR",
> "DELTACORP"), X.2 = c("Leverage (in", "",
"4.5", "4.5", "4.5",
> "4.5", "4.5", "4.5", "4.5",
"4.5", "4", "4.5", "4.5",
"4.5",
> "4.5", "4.5", "4.5", "4.5",
"4.5", "4.5", "4.5", "4.5",
"4.5",
> "4.5", "4.5", "4.5", "4",
"4", "4.5", "4.5", "4.5",
"4.5",
> "4.5", "4", "4.5", "4.5",
"4.5", "4", "4.5", "4", "4.5",
> "4", "4", "4.5", "4.5",
"4", "4.5", "4.5", "4.5",
"4.5",
> "4.5", "4.5", "4.5", "4.5",
"4")), class = "data.frame", row.names = c(NA,
>-55L)), structure(list(X54 = 55:110, DIVI.S.LABORATORIES.LTD = c("DIXON
TECHNO (INDIA) LTD",
>"DLF LIMITED", "DR. REDDY S LABORATORIES", "ESCORTS
INDIA LTD",
>"EXIDE INDUSTRIES LTD", "FEDERAL BANK LTD",
"FIRSTSOURCE SOLU. LTD.",
>"GAIL (INDIA) LTD", "GLENMARK PHARMACEUTICALS",
"GMR INFRASTRUCTURE LTD.",
>"GUJ NAR VAL FER & CHEM L", "DALMIA BHARAT LIMITED",
"GODREJ CONSUMER PRODUCTS",
>"GRANULES INDIA LIMITED", "GRASIM INDUSTRIES LTD",
"GUJARAT STATE PETRO LTD",
>"GUJARAT GAS LIMITED", "HINDUSTAN AERONAUTICS LTD",
"HAVELLS INDIA LIMITED",
>"HCL TECHNOLOGIES LTD", "HDFC LTD", "HDFC AMC
LIMITED", "HDFC BANK LTD",
>"HDFC LIFE INS CO LTD", "HERO MOTOCORP LIMITED",
"HINDALCO INDUSTRIES LTD",
>"HINDUSTAN COPPER LTD", "HONEYWELL AUTOMATION IND",
"ICICI BANK LTD.",
>"ICICI LOMBARD GIC LIMITED", "ICICI PRU LIFE INS CO
LTD", "IDFC LIMITED",
>"IDFC FIRST BANK LIMITED", "INDIAN ENERGY EXC LTD",
"INDRAPRASTHA GAS LTD",
>"THE INDIAN HOTELS CO. LTD", "THE INDIA CEMENTS
LIMITED", "INDIAMART INTERMESH LTD",
>"INTERGLOBE AVIATION LTD", "INDUSIND BANK LIMITED",
"INDUS TOWERS LIMITED",
>"INFOSYS LIMITED", "INTELLECT DESIGN ARENA",
"INDIAN OIL CORP LTD",
>"IPCA LABORATORIES LTD", "INDIAN RAIL TOUR CORP LTD",
"ITC LTD",
>"JINDAL STEEL & POWER LTD", "JSW STEEL LIMITED",
"JUBILANT FOODWORKS LTD",
>"KOTAK MAHINDRA BANK LTD", "L&T FINANCE HOLDINGS
LTD", "DR. LAL PATH LABS LTD.",
>"LAURUS LABS LIMITED", "LIC HOUSING FINANCE LTD",
"LARSEN & TOUBRO LTD."
>), DIVISLAB = c("DIXON", "DLF", "DRREDDY",
"ESCORTS", "EXIDEIND",
>"FEDERALBNK", "FSL", "GAIL",
"GLENMARK", "GMRINFRA", "GNFC",
>"DALBHARAT", "GODREJCP", "GRANULES",
"GRASIM", "GSPL", "GUJGASLTD",
>"HAL", "HAVELLS", "HCLTECH", "HDFC",
"HDFCAMC", "HDFCBANK", "HDFCLIFE",
>"HEROMOTOCO", "HINDALCO", "HINDCOPPER",
"HONAUT", "ICICIBANK",
>"ICICIGI", "ICICIPRULI", "IDFC",
"IDFCFIRSTB", "IEX", "IGL",
>"INDHOTEL", "INDIACEM", "INDIAMART",
"INDIGO", "INDUSINDBK",
>"INDUSTOWER", "INFY", "INTELLECT",
"IOC", "IPCALAB", "IRCTC",
>"ITC", "JINDALSTEL", "JSWSTEEL",
"JUBLFOOD", "KOTAKBANK", "L&TFH",
>"LALPATHLAB", "LAURUSLABS", "LICHSGFIN",
"LT"), X4.5 = c(4.5,
>4, 4.5, 4.5, 4.5, 4.5, 4, 4.5, 4.5, 4.5, 4, 4.5, 4.5, 4.5, 4.5,
>4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4, 4.5,
>4.5, 4.5, 4.5, 4, 4.5, 4.5, 4.5, 4.5, 4, 4, 4.5, 4, 4, 4.5, 4,
>4.5, 4.5, 4, 4.5, 4, 4.5, 4.5, 4.5, 4, 4.5, 4.5, 4.5, 4.5)), class =
"data.frame", row.names = c(NA,
>-56L)), structure(list(X111 = 112:167, L.T.INFOTECH.LIMITED =
c("L&T TECHNOLOGY SER. LTD.",
>"LUPIN LIMITED", "MAHINDRA & MAHINDRA LTD",
"M&M FIN. SERVICES LTD",
>"MANAPPURAM FINANCE LTD", "MARICO LIMITED", "MARUTI
SUZUKI INDIA LTD.",
>"UNITED SPIRITS LIMITED", "MULTI COMMODITY EXCHANGE",
"MAX FINANCIAL SERV LTD",
>"MAHANAGAR GAS LTD.", "MINDTREE LIMITED",
"MOTHERSON SUMI SYSTEMS LT",
>"MPHASIS LIMITED", "MRF LTD", "MUTHOOT FINANCE
LIMITED", "NATIONAL ALUMINIUM CO LTD",
>"INFO EDGE (I) LTD", "NAVIN FLUORINE INT. LTD",
"NBCC (INDIA) LIMITED",
>"NMDC LTD.", "NTPC LTD", "OBEROI REALTY
LIMITED", "ORACLE FIN SERV SOFT LTD.",
>"OIL AND NATURAL GAS CORP.", "PAGE INDUSTRIES LTD",
"PIRAMAL ENTERPRISES LTD",
>"PERSISTENT SYSTEMS LTD", "PETRONET LNG LIMITED",
"POWER FIN CORP LTD.",
>"PIDILITE INDUSTRIES LTD", "PI INDUSTRIES LTD",
"PUNJAB NATIONAL BANK",
>"POLYCAB INDIA LIMITED", "POWER GRID CORP. LTD.",
"PVR LIMITED",
>"RAIN INDUSTRIES LIMITED", "THE RAMCO CEMENTS LIMITED",
"RBL BANK LIMITED",
>"REC LIMITED", "RELIANCE INDUSTRIES LTD", "STEEL
AUTHORITY OF INDIA",
>"SBI CARDS & PAY SER LTD", "SBI LIFE INSURANCE CO
LTD", "STATE BANK OF INDIA",
>"SHREE CEMENT LIMITED", "SIEMENS LTD", "SRF
LTD", "SHRIRAM TRANSPORT FIN CO.",
>"STRIDES PHARMA SCI LTD", "SUN PHARMACEUTICAL IND L",
"SUN TV NETWORK LIMITED",
>"SYNGENE INTERNATIONAL LTD", "TATA CHEMICALS LTD",
"TATA COMMUNICATIONS LTD",
>"TATA CONSUMER PRODUCT LTD"), LTI = c("LTTS",
"LUPIN", "M&M",
>"M&MFIN", "MANAPPURAM", "MARICO",
"MARUTI", "MCDOWELL-N", "MCX",
>"MFSL", "MGL", "MINDTREE",
"MOTHERSUMI", "MPHASIS", "MRF",
"MUTHOOTFIN",
>"NATIONALUM", "NAUKRI", "NAVINFLUOR",
"NBCC", "NMDC", "NTPC",
>"OBEROIRLTY", "OFSS", "ONGC",
"PAGEIND", "PEL", "PERSISTENT",
>"PETRONET", "PFC", "PIDILITIND",
"PIIND", "PNB", "POLYCAB", "POWERGRID",
>"PVR", "RAIN", "RAMCOCEM",
"RBLBANK", "RECLTD", "RELIANCE", "SAIL",
>"SBICARD", "SBILIFE", "SBIN",
"SHREECEM", "SIEMENS", "SRF",
"SRTRANSFIN",
>"STAR", "SUNPHARMA", "SUNTV",
"SYNGENE", "TATACHEM", "TATACOMM",
>"TATACONSUM"), X4.5 = c(4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5,
>4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4, 4.5,
>4.5, 4.5, 4.5, 4.5, 4.5, 4, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5,
>4.5, 4.5, 4, 4.5, 4, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5,
>4.5, 4, 4, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5)), class = "data.frame",
row.names = c(NA,
>-56L)), structure(list(X168 = 169:198, TATA.MOTORS.LIMITED = c("TATA
POWER CO LTD",
>"TATA STEEL LIMITED", "TATA CONSULTANCY SERV LT",
"TECH MAHINDRA LIMITED",
>"TITAN COMPANY LIMITED", "TORRENT PHARMACEUTICALS L",
"TORRENT POWER LTD",
>"TRENT LTD", "TVS MOTOR COMPANY LTD", "UNITED
BREWERIES LTD",
>"ULTRATECH CEMENT LIMITED", "UPL LIMITED", "VEDANTA
LIMITED",
>"VOLTAS LTD", "WHIRLPOOL OF INDIA LTD", "WIPRO
LTD", "ZEE ENTERTAINMENT ENT LTD",
>"BALKRISHNA IND. LTD", "BHARAT FORGE LTD", "BHARAT
PETROLEUM CORP LT",
>"EICHER MOTORS LTD", "GODREJ PROPERTIES LTD",
"HINDUSTAN PETROLEUM CORP",
>"JK CEMENT LIMITED", "NESTLE INDIA LIMITED",
"METROPOLIS HEALTHCARE LTD",
>"HINDUSTAN UNILEVER LTD.", "VODAFONE IDEA LIMITED",
"NIPPON L I A M LTD",
>"INDIABULLS HSG FIN LTD"), TATAMOTORS = c("TATAPOWER",
"TATASTEEL",
>"TCS", "TECHM", "TITAN",
"TORNTPHARM", "TORNTPOWER", "TRENT",
>"TVSMOTOR", "UBL", "ULTRACEMCO",
"UPL", "VEDL", "VOLTAS", "WHIRLPOOL",
>"WIPRO", "ZEEL", "BALKRISIND",
"BHARATFORG", "BPCL", "EICHERMOT",
>"GODREJPROP", "HINDPETRO", "JKCEMENT",
"NESTLEIND", "METROPOLIS",
>"HINDUNILVR", "IDEA", "NAM-INDIA",
"IBULHSGFIN"), X4 = c(4.5,
>4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5,
>4.5, 4.5, 3, 4.5, 4.5, 4.5, 4.5, 4, 4.5, 4.5, 4.5, 4.5, 4.5,
>4.5, 4.5, 3)), class = "data.frame", row.names = c(NA, -30L)))
>
>unlist(IDTpdf) is not working. It makes IDTpdf a lot messier...
>
>I want IDTpdf to be one data frame combining all the four pages in the above
url...
>
>ANy help will be greatly appreciated...
>
>Thanking you,
>Yours sincerely,
>AKSHAY M KULKARNI
>
>
> [[alternative HTML version deleted]]
>
>______________________________________________
>R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
>https://stat.ethz.ch/mailman/listinfo/r-help
>PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
>and provide commented, minimal, self-contained, reproducible code.
--
Sent from my phone. Please excuse my brevity.