Dear all, I have 3-hourly temperature data from 1970-2010 for 122 cities in the US. I would like to bin this data by city-year-week. My idea is if the temperature for a particular city in a given week falls within a given range (-17.78 & -12.22), (-12.22 & -6.67), ... (37.78 & 43.33), then the corresponding bin would have a value of 1 and 0 otherwise. The data looks like this. Basically, I need to generate a dummy variable for each temperature range. Any help will be greatly appreciated. tmp2<- dput(head(tmp1,10))> structure(list(yearday = c(1970001L, 1970001L, 1970001L, 1970001L, > 1970001L, 1970001L, 1970001L, 1970001L, 1970001L, 1970001L), > City = structure(1:10, .Label = c("AKRON", "ALBANY", "ALBUQUERQUE", > "ALLENTOWN", "ATLANTA", "AUSTIN", "BALTIMORE", "BATON ROUGE", > "BERKELEY", "BIRMINGHAM", "BOISE", "BOSTON", "BRIDGEPORT", > "BUFFALO", "CAMBRIDGE", "CAMDEN", "CANTON", "CHARLOTTE", > "CHATTANOOGA", "CHICAGO", "CINCINNATI", "CLEVELAND", "COLORADO > SPRINGS", > "COLUMBUS", "CORPUS CHRISTI", "DALLAS", "DAYTON", "DENVER", > "DES MOINES", "DETROIT", "DULUTH", "EL PASO", "ELIZABETH", > "ERIE", "EVANSVILLE", "FALL RIVER", "FLINT", "FORT WAYNE", > "FRESNO", "FT WORTH", "GARY", "GLENDALE", "GRAND RAPIDS", > "HARTFORD", "HONOLULU", "HOUSTON", "INDIANAPOLIS", "JACKSONVILLE", > "JERSEY CITY", "KANSAS CITY", "KANSAS ITY", "KNOXVILLE", > "Lansing ", "LAS VEGAS", "LEXINGTON", "LINCOLN", "LITTLE ROCK", > "LONG BEACH", "LOS ANGELES", "LOUISVILLE", "LOWELL", "LYNN", > "MADISON", "MEMPHIS", "MIAMI", "MILWAUKEE", "MINNEAPOLIS", > "MOBILE", "MONTGOMERY", "NASHVILLE", "NEW BEDFORD", "NEW HAVEN", > "NEW ORLEANS", "NEW YORK CITY", "NEWARK", "NORFOLK", "OAKLAND", > "OGDEN", "OKLAHOMA CITY", "OMAHA", "PASADENA", "PATERSON", > "PEORIA", "PHILADELPHIA", "PHOENIX", "PITTSBURG", "PORTLAND", > "PROVIDENCE", "PUEBLO", "READING", "RICHMOND", "ROCHESTER", > "ROCKFORD", "SACRAMENTO", "SALT LAKE CITY", "SAN ANTONIO", > "SAN CRUZ", "SAN DIEGO", "SAN FRANCISCO", "SAN JOSE", "SAVANNAH", > "SCHENECTADY", "SCRANTON", "SEATTLE", "SHREVEPORT", "SOMERVILLE", > "SOUTH BEND", "SPOKANE", "SPRINGFIELD", "ST LOUIS", "ST PAUL", > "ST PETERSBURG", "SYRACUSE", "TACOMA", "TAMPA", "TOLEDO", > "TRENTON", "TUCSON", "TULSA", "UTICA", "WASHINGTON", "WATERBURY", > "WICHITA", "WILMINGTON", "WORCESTER", "YONKERS", "YOUNGSTOWN" > ), class = "factor"), cell_number = c(17379L, 17027L, 19514L, > 17745L, 20256L, 21323L, 18104L, 21329L, 18779L, 20254L), > longitude = c(-81.519005, -73.756232, -106.609991, -75.490183, > -84.387982, -97.743061, -76.612189, -91.14032, -121.635963, > -86.80249), latitude = c(41.081445, 42.652579, 35.110703, > 40.608431, 33.748995, 30.267153, 39.290385, 30.458283, 37.871744, > 33.520661), State = structure(c(29L, 28L, 27L, 32L, 10L, > 35L, 19L, 17L, 4L, 1L), .Label = c(" ALA", " ARIZ", " ARK", > " CAL", " COLO", " CONN", " DC", " DEL", " FLA", " GA", " HAWAII", > " ILL", " IND", " IOWA", " KANS", " KY", " LA", " MASS", > " MD", " MICH", " MINN", " MO", " NC", " NEBR", " NEV", " NJ", > " NM", " NY", " OHIO", " OKLA", " ORE", " PA", " RI", " TENN", > " TEX", " UTAH", " VA", " WASH", " WIS", "CAL", "CONN", "IDAH", > "KY", "MASS"), class = "factor"), avsft = c(-7.81, -16.06, > -7.71999999999997, -1.88999999999999, 2.90000000000003, 5.12, > -5.02999999999997, 9.33000000000004, 15.08, 2.89000000000004 > ), year = c(1970L, 1970L, 1970L, 1970L, 1970L, 1970L, 1970L, > 1970L, 1970L, 1970L), day = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, > 1L, 1L, 1L), hour = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, > 0L), yearweek = c(197001L, 197001L, 197001L, 197001L, 197001L, > 197001L, 197001L, 197001L, 197001L, 197001L), week = c(1L, > 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("yearday", > "City", "cell_number", "longitude", "latitude", "State", "avsft", > "year", "day", "hour", "yearweek", "week"), row.names = c(NA, > 10L), class = "data.frame")Sincerely, Shouro [[alternative HTML version deleted]]
1. Posting in HTML largely negated your ability to provide data through dput(). Folow he posting guide and post in PLAIN TEXT only, please. 2. See ?cut . I think this will at least get you started. Cheers, Bert Bert Gunter "Data is not information. Information is not knowledge. And knowledge is certainly not wisdom." -- Clifford Stoll On Thu, Sep 10, 2015 at 3:28 PM, Shouro Dasgupta <shouro at gmail.com> wrote:> Dear all, > > I have 3-hourly temperature data from 1970-2010 for 122 cities in the US. I > would like to bin this data by city-year-week. My idea is if the > temperature for a particular city in a given week falls within a given > range (-17.78 & -12.22), (-12.22 & -6.67), ... (37.78 & 43.33), then the > corresponding bin would have a value of 1 and 0 otherwise. > > The data looks like this. Basically, I need to generate a dummy variable > for each temperature range. Any help will be greatly appreciated. > > tmp2<- dput(head(tmp1,10)) >> structure(list(yearday = c(1970001L, 1970001L, 1970001L, 1970001L, >> 1970001L, 1970001L, 1970001L, 1970001L, 1970001L, 1970001L), >> City = structure(1:10, .Label = c("AKRON", "ALBANY", "ALBUQUERQUE", >> "ALLENTOWN", "ATLANTA", "AUSTIN", "BALTIMORE", "BATON ROUGE", >> "BERKELEY", "BIRMINGHAM", "BOISE", "BOSTON", "BRIDGEPORT", >> "BUFFALO", "CAMBRIDGE", "CAMDEN", "CANTON", "CHARLOTTE", >> "CHATTANOOGA", "CHICAGO", "CINCINNATI", "CLEVELAND", "COLORADO >> SPRINGS", >> "COLUMBUS", "CORPUS CHRISTI", "DALLAS", "DAYTON", "DENVER", >> "DES MOINES", "DETROIT", "DULUTH", "EL PASO", "ELIZABETH", >> "ERIE", "EVANSVILLE", "FALL RIVER", "FLINT", "FORT WAYNE", >> "FRESNO", "FT WORTH", "GARY", "GLENDALE", "GRAND RAPIDS", >> "HARTFORD", "HONOLULU", "HOUSTON", "INDIANAPOLIS", "JACKSONVILLE", >> "JERSEY CITY", "KANSAS CITY", "KANSAS ITY", "KNOXVILLE", >> "Lansing ", "LAS VEGAS", "LEXINGTON", "LINCOLN", "LITTLE ROCK", >> "LONG BEACH", "LOS ANGELES", "LOUISVILLE", "LOWELL", "LYNN", >> "MADISON", "MEMPHIS", "MIAMI", "MILWAUKEE", "MINNEAPOLIS", >> "MOBILE", "MONTGOMERY", "NASHVILLE", "NEW BEDFORD", "NEW HAVEN", >> "NEW ORLEANS", "NEW YORK CITY", "NEWARK", "NORFOLK", "OAKLAND", >> "OGDEN", "OKLAHOMA CITY", "OMAHA", "PASADENA", "PATERSON", >> "PEORIA", "PHILADELPHIA", "PHOENIX", "PITTSBURG", "PORTLAND", >> "PROVIDENCE", "PUEBLO", "READING", "RICHMOND", "ROCHESTER", >> "ROCKFORD", "SACRAMENTO", "SALT LAKE CITY", "SAN ANTONIO", >> "SAN CRUZ", "SAN DIEGO", "SAN FRANCISCO", "SAN JOSE", "SAVANNAH", >> "SCHENECTADY", "SCRANTON", "SEATTLE", "SHREVEPORT", "SOMERVILLE", >> "SOUTH BEND", "SPOKANE", "SPRINGFIELD", "ST LOUIS", "ST PAUL", >> "ST PETERSBURG", "SYRACUSE", "TACOMA", "TAMPA", "TOLEDO", >> "TRENTON", "TUCSON", "TULSA", "UTICA", "WASHINGTON", "WATERBURY", >> "WICHITA", "WILMINGTON", "WORCESTER", "YONKERS", "YOUNGSTOWN" >> ), class = "factor"), cell_number = c(17379L, 17027L, 19514L, >> 17745L, 20256L, 21323L, 18104L, 21329L, 18779L, 20254L), >> longitude = c(-81.519005, -73.756232, -106.609991, -75.490183, >> -84.387982, -97.743061, -76.612189, -91.14032, -121.635963, >> -86.80249), latitude = c(41.081445, 42.652579, 35.110703, >> 40.608431, 33.748995, 30.267153, 39.290385, 30.458283, 37.871744, >> 33.520661), State = structure(c(29L, 28L, 27L, 32L, 10L, >> 35L, 19L, 17L, 4L, 1L), .Label = c(" ALA", " ARIZ", " ARK", >> " CAL", " COLO", " CONN", " DC", " DEL", " FLA", " GA", " HAWAII", >> " ILL", " IND", " IOWA", " KANS", " KY", " LA", " MASS", >> " MD", " MICH", " MINN", " MO", " NC", " NEBR", " NEV", " NJ", >> " NM", " NY", " OHIO", " OKLA", " ORE", " PA", " RI", " TENN", >> " TEX", " UTAH", " VA", " WASH", " WIS", "CAL", "CONN", "IDAH", >> "KY", "MASS"), class = "factor"), avsft = c(-7.81, -16.06, >> -7.71999999999997, -1.88999999999999, 2.90000000000003, 5.12, >> -5.02999999999997, 9.33000000000004, 15.08, 2.89000000000004 >> ), year = c(1970L, 1970L, 1970L, 1970L, 1970L, 1970L, 1970L, >> 1970L, 1970L, 1970L), day = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, >> 1L, 1L, 1L), hour = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, >> 0L), yearweek = c(197001L, 197001L, 197001L, 197001L, 197001L, >> 197001L, 197001L, 197001L, 197001L, 197001L), week = c(1L, >> 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("yearday", >> "City", "cell_number", "longitude", "latitude", "State", "avsft", >> "year", "day", "hour", "yearweek", "week"), row.names = c(NA, >> 10L), class = "data.frame") > > > Sincerely, > > Shouro > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code.
On Sep 10, 2015, at 3:28 PM, Shouro Dasgupta wrote:> Dear all, > > I have 3-hourly temperature data from 1970-2010 for 122 cities in the US. I > would like to bin this data by city-year-week. My idea is if the > temperature for a particular city in a given week falls within a given > range (-17.78 & -12.22), (-12.22 & -6.67), ... (37.78 & 43.33), then the > corresponding bin would have a value of 1 and 0 otherwise. > > The data looks like this. Basically, I need to generate a dummy variable > for each temperature range. Any help will be greatly appreciated.The urge to imitate other statistical package that rely on profusion of dummies should be resisted. R repression functions can handle factor variables and the `cut` function can deliver them along with appropriate use of `seq`: tmp2$Tcat <- cut( tmp2$avsft, breaks=seq (-17.78, 43.33, by= 5.55 ) )> tmp2$Tcat[1] (-12.2,-6.68] (-17.8,-12.2] (-12.2,-6.68] (-6.68,-1.13] [5] (-1.13,4.42] (4.42,9.97] (-6.68,-1.13] (4.42,9.97] [9] (9.97,15.5] (-1.13,4.42] 11 Levels: (-17.8,-12.2] (-12.2,-6.68] ... (37.7,43.3]> tmp2[ , c("City", "Tcat")]City Tcat 1 AKRON (-12.2,-6.68] 2 ALBANY (-17.8,-12.2] 3 ALBUQUERQUE (-12.2,-6.68] 4 ALLENTOWN (-6.68,-1.13] 5 ATLANTA (-1.13,4.42] 6 AUSTIN (4.42,9.97] 7 BALTIMORE (-6.68,-1.13] 8 BATON ROUGE (4.42,9.97] 9 BERKELEY (9.97,15.5] 10 BIRMINGHAM (-1.13,4.42] Must have been a cold snap in the southeast that New Years Day. There.... isn't that much neater than have a messy bunch of dummies? If you really need to build them then look at `?model.frame`. -- David.> > tmp2<- dput(head(tmp1,10)) >> structure(list(yearday = c(1970001L, 1970001L, 1970001L, 1970001L, >> 1970001L, 1970001L, 1970001L, 1970001L, 1970001L, 1970001L), >> City = structure(1:10, .Label = c("AKRON", "ALBANY", "ALBUQUERQUE", >> "ALLENTOWN", "ATLANTA", "AUSTIN", "BALTIMORE", "BATON ROUGE", >> "BERKELEY", "BIRMINGHAM", "BOISE", "BOSTON", "BRIDGEPORT", >> "BUFFALO", "CAMBRIDGE", "CAMDEN", "CANTON", "CHARLOTTE", >> "CHATTANOOGA", "CHICAGO", "CINCINNATI", "CLEVELAND", "COLORADO >> SPRINGS", >> "COLUMBUS", "CORPUS CHRISTI", "DALLAS", "DAYTON", "DENVER", >> "DES MOINES", "DETROIT", "DULUTH", "EL PASO", "ELIZABETH", >> "ERIE", "EVANSVILLE", "FALL RIVER", "FLINT", "FORT WAYNE", >> "FRESNO", "FT WORTH", "GARY", "GLENDALE", "GRAND RAPIDS", >> "HARTFORD", "HONOLULU", "HOUSTON", "INDIANAPOLIS", "JACKSONVILLE", >> "JERSEY CITY", "KANSAS CITY", "KANSAS ITY", "KNOXVILLE", >> "Lansing ", "LAS VEGAS", "LEXINGTON", "LINCOLN", "LITTLE ROCK", >> "LONG BEACH", "LOS ANGELES", "LOUISVILLE", "LOWELL", "LYNN", >> "MADISON", "MEMPHIS", "MIAMI", "MILWAUKEE", "MINNEAPOLIS", >> "MOBILE", "MONTGOMERY", "NASHVILLE", "NEW BEDFORD", "NEW HAVEN", >> "NEW ORLEANS", "NEW YORK CITY", "NEWARK", "NORFOLK", "OAKLAND", >> "OGDEN", "OKLAHOMA CITY", "OMAHA", "PASADENA", "PATERSON", >> "PEORIA", "PHILADELPHIA", "PHOENIX", "PITTSBURG", "PORTLAND", >> "PROVIDENCE", "PUEBLO", "READING", "RICHMOND", "ROCHESTER", >> "ROCKFORD", "SACRAMENTO", "SALT LAKE CITY", "SAN ANTONIO", >> "SAN CRUZ", "SAN DIEGO", "SAN FRANCISCO", "SAN JOSE", "SAVANNAH", >> "SCHENECTADY", "SCRANTON", "SEATTLE", "SHREVEPORT", "SOMERVILLE", >> "SOUTH BEND", "SPOKANE", "SPRINGFIELD", "ST LOUIS", "ST PAUL", >> "ST PETERSBURG", "SYRACUSE", "TACOMA", "TAMPA", "TOLEDO", >> "TRENTON", "TUCSON", "TULSA", "UTICA", "WASHINGTON", "WATERBURY", >> "WICHITA", "WILMINGTON", "WORCESTER", "YONKERS", "YOUNGSTOWN" >> ), class = "factor"), cell_number = c(17379L, 17027L, 19514L, >> 17745L, 20256L, 21323L, 18104L, 21329L, 18779L, 20254L), >> longitude = c(-81.519005, -73.756232, -106.609991, -75.490183, >> -84.387982, -97.743061, -76.612189, -91.14032, -121.635963, >> -86.80249), latitude = c(41.081445, 42.652579, 35.110703, >> 40.608431, 33.748995, 30.267153, 39.290385, 30.458283, 37.871744, >> 33.520661), State = structure(c(29L, 28L, 27L, 32L, 10L, >> 35L, 19L, 17L, 4L, 1L), .Label = c(" ALA", " ARIZ", " ARK", >> " CAL", " COLO", " CONN", " DC", " DEL", " FLA", " GA", " HAWAII", >> " ILL", " IND", " IOWA", " KANS", " KY", " LA", " MASS", >> " MD", " MICH", " MINN", " MO", " NC", " NEBR", " NEV", " NJ", >> " NM", " NY", " OHIO", " OKLA", " ORE", " PA", " RI", " TENN", >> " TEX", " UTAH", " VA", " WASH", " WIS", "CAL", "CONN", "IDAH", >> "KY", "MASS"), class = "factor"), avsft = c(-7.81, -16.06, >> -7.71999999999997, -1.88999999999999, 2.90000000000003, 5.12, >> -5.02999999999997, 9.33000000000004, 15.08, 2.89000000000004 >> ), year = c(1970L, 1970L, 1970L, 1970L, 1970L, 1970L, 1970L, >> 1970L, 1970L, 1970L), day = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, >> 1L, 1L, 1L), hour = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, >> 0L), yearweek = c(197001L, 197001L, 197001L, 197001L, 197001L, >> 197001L, 197001L, 197001L, 197001L, 197001L), week = c(1L, >> 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("yearday", >> "City", "cell_number", "longitude", "latitude", "State", "avsft", >> "year", "day", "hour", "yearweek", "week"), row.names = c(NA, >> 10L), class = "data.frame") > > > Sincerely, > > Shouro > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code.David Winsemius Alameda, CA, USA
On 11/09/15 11:57, David Winsemius wrote: <SNIP>> The urge to imitate other statistical package that rely on profusion > of dummies should be resisted. R repression functions can handle > factor variables ....<SNIP> Fortune? :-) cheers, Rolf -- Technical Editor ANZJS Department of Statistics University of Auckland Phone: +64-9-373-7599 ext. 88276
Apologies for the HTML. It shouldn't have happened. I would like to use the dummies as independent variables in a regression. I did manage to use count of observations in a given range using the following code: for (i in filelist) { # i <- filelist[1] tmp1 <- as.data.table(read.csv(i, sep=",")) year<-tmp1$year[1] mykey=c("City","year","week") output <- as.data.frame(tmp1[,sum(avsft< -0),by=mykey])[,1:length(mykey)] output$avsft_1<- as.data.frame(tmp1[,sum(avsft>= -17.78 & avsft< -12.22, na.rm=T), by=mykey])[,length(mykey)+1] Where "i" is filenames (each file has data for 1 year). But instead of count I would like to generate dummy variables for ranges [(-17.78 & -12.22), (-12.22 & -6.67), ... (37.78 & 43.33)], so if a temperature observation falls within a given range - the dummy variable for that range will have a value of 1 for that week. Thanks again! tmp2<- dput(head(tmp1,10)) structure(list(yearday = c(1970001L, 1970001L, 1970001L, 1970001L, 1970001L, 1970001L, 1970001L, 1970001L, 1970001L, 1970001L), City = structure(1:10, .Label = c("AKRON", "ALBANY", "ALBUQUERQUE", "ALLENTOWN", "ATLANTA", "AUSTIN", "BALTIMORE", "BATON ROUGE", "BERKELEY", "BIRMINGHAM", "BOISE", "BOSTON", "BRIDGEPORT", "BUFFALO", "CAMBRIDGE", "CAMDEN", "CANTON", "CHARLOTTE", "CHATTANOOGA", "CHICAGO", "CINCINNATI", "CLEVELAND", "COLORADO SPRINGS", "COLUMBUS", "CORPUS CHRISTI", "DALLAS", "DAYTON", "DENVER", "DES MOINES", "DETROIT", "DULUTH", "EL PASO", "ELIZABETH", "ERIE", "EVANSVILLE", "FALL RIVER", "FLINT", "FORT WAYNE", "FRESNO", "FT WORTH", "GARY", "GLENDALE", "GRAND RAPIDS", "HARTFORD", "HONOLULU", "HOUSTON", "INDIANAPOLIS", "JACKSONVILLE", "JERSEY CITY", "KANSAS CITY", "KANSAS ITY", "KNOXVILLE", "Lansing ", "LAS VEGAS", "LEXINGTON", "LINCOLN", "LITTLE ROCK", "LONG BEACH", "LOS ANGELES", "LOUISVILLE", "LOWELL", "LYNN", "MADISON", "MEMPHIS", "MIAMI", "MILWAUKEE", "MINNEAPOLIS", "MOBILE", "MONTGOMERY", "NASHVILLE", "NEW BEDFORD", "NEW HAVEN", "NEW ORLEANS", "NEW YORK CITY", "NEWARK", "NORFOLK", "OAKLAND", "OGDEN", "OKLAHOMA CITY", "OMAHA", "PASADENA", "PATERSON", "PEORIA", "PHILADELPHIA", "PHOENIX", "PITTSBURG", "PORTLAND", "PROVIDENCE", "PUEBLO", "READING", "RICHMOND", "ROCHESTER", "ROCKFORD", "SACRAMENTO", "SALT LAKE CITY", "SAN ANTONIO", "SAN CRUZ", "SAN DIEGO", "SAN FRANCISCO", "SAN JOSE", "SAVANNAH", "SCHENECTADY", "SCRANTON", "SEATTLE", "SHREVEPORT", "SOMERVILLE", "SOUTH BEND", "SPOKANE", "SPRINGFIELD", "ST LOUIS", "ST PAUL", "ST PETERSBURG", "SYRACUSE", "TACOMA", "TAMPA", "TOLEDO", "TRENTON", "TUCSON", "TULSA", "UTICA", "WASHINGTON", "WATERBURY", "WICHITA", "WILMINGTON", "WORCESTER", "YONKERS", "YOUNGSTOWN" ), class = "factor"), cell_number = c(17379L, 17027L, 19514L, 17745L, 20256L, 21323L, 18104L, 21329L, 18779L, 20254L), longitude = c(-81.519005, -73.756232, -106.609991, -75.490183, -84.387982, -97.743061, -76.612189, -91.14032, -121.635963, -86.80249), latitude = c(41.081445, 42.652579, 35.110703, 40.608431, 33.748995, 30.267153, 39.290385, 30.458283, 37.871744, 33.520661), State = structure(c(29L, 28L, 27L, 32L, 10L, 35L, 19L, 17L, 4L, 1L), .Label = c(" ALA", " ARIZ", " ARK", " CAL", " COLO", " CONN", " DC", " DEL", " FLA", " GA", " HAWAII", " ILL", " IND", " IOWA", " KANS", " KY", " LA", " MASS", " MD", " MICH", " MINN", " MO", " NC", " NEBR", " NEV", " NJ", " NM", " NY", " OHIO", " OKLA", " ORE", " PA", " RI", " TENN", " TEX", " UTAH", " VA", " WASH", " WIS", "CAL", "CONN", "IDAH", "KY", "MASS"), class = "factor"), avsft = c(-7.81, -16.06, -7.71999999999997, -1.88999999999999, 2.90000000000003, 5.12, -5.02999999999997, 9.33000000000004, 15.08, 2.89000000000004 ), year = c(1970L, 1970L, 1970L, 1970L, 1970L, 1970L, 1970L, 1970L, 1970L, 1970L), day = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), hour = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), yearweek = c(197001L, 197001L, 197001L, 197001L, 197001L, 197001L, 197001L, 197001L, 197001L, 197001L), week = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("yearday", "City", "cell_number", "longitude", "latitude", "State", "avsft", "year", "day", "hour", "yearweek", "week"), row.names = c(NA, 10L), class = "data.frame") Sincerely, Shouro On Fri, Sep 11, 2015 at 12:33 AM, Bert Gunter <bgunter.4567 at gmail.com> wrote:> 1. Posting in HTML largely negated your ability to provide data > through dput(). Folow he posting guide and post in PLAIN TEXT only, > please. > > 2. See ?cut . I think this will at least get you started. > > Cheers, > Bert > Bert Gunter > > "Data is not information. Information is not knowledge. And knowledge > is certainly not wisdom." > -- Clifford Stoll > > > On Thu, Sep 10, 2015 at 3:28 PM, Shouro Dasgupta <shouro at gmail.com> wrote: > > Dear all, > > > > I have 3-hourly temperature data from 1970-2010 for 122 cities in the > US. I > > would like to bin this data by city-year-week. My idea is if the > > temperature for a particular city in a given week falls within a given > > range (-17.78 & -12.22), (-12.22 & -6.67), ... (37.78 & 43.33), then the > > corresponding bin would have a value of 1 and 0 otherwise. > > > > The data looks like this. Basically, I need to generate a dummy variable > > for each temperature range. Any help will be greatly appreciated. > > > > tmp2<- dput(head(tmp1,10)) > >> structure(list(yearday = c(1970001L, 1970001L, 1970001L, 1970001L, > >> 1970001L, 1970001L, 1970001L, 1970001L, 1970001L, 1970001L), > >> City = structure(1:10, .Label = c("AKRON", "ALBANY", "ALBUQUERQUE", > >> "ALLENTOWN", "ATLANTA", "AUSTIN", "BALTIMORE", "BATON ROUGE", > >> "BERKELEY", "BIRMINGHAM", "BOISE", "BOSTON", "BRIDGEPORT", > >> "BUFFALO", "CAMBRIDGE", "CAMDEN", "CANTON", "CHARLOTTE", > >> "CHATTANOOGA", "CHICAGO", "CINCINNATI", "CLEVELAND", "COLORADO > >> SPRINGS", > >> "COLUMBUS", "CORPUS CHRISTI", "DALLAS", "DAYTON", "DENVER", > >> "DES MOINES", "DETROIT", "DULUTH", "EL PASO", "ELIZABETH", > >> "ERIE", "EVANSVILLE", "FALL RIVER", "FLINT", "FORT WAYNE", > >> "FRESNO", "FT WORTH", "GARY", "GLENDALE", "GRAND RAPIDS", > >> "HARTFORD", "HONOLULU", "HOUSTON", "INDIANAPOLIS", "JACKSONVILLE", > >> "JERSEY CITY", "KANSAS CITY", "KANSAS ITY", "KNOXVILLE", > >> "Lansing ", "LAS VEGAS", "LEXINGTON", "LINCOLN", "LITTLE ROCK", > >> "LONG BEACH", "LOS ANGELES", "LOUISVILLE", "LOWELL", "LYNN", > >> "MADISON", "MEMPHIS", "MIAMI", "MILWAUKEE", "MINNEAPOLIS", > >> "MOBILE", "MONTGOMERY", "NASHVILLE", "NEW BEDFORD", "NEW HAVEN", > >> "NEW ORLEANS", "NEW YORK CITY", "NEWARK", "NORFOLK", "OAKLAND", > >> "OGDEN", "OKLAHOMA CITY", "OMAHA", "PASADENA", "PATERSON", > >> "PEORIA", "PHILADELPHIA", "PHOENIX", "PITTSBURG", "PORTLAND", > >> "PROVIDENCE", "PUEBLO", "READING", "RICHMOND", "ROCHESTER", > >> "ROCKFORD", "SACRAMENTO", "SALT LAKE CITY", "SAN ANTONIO", > >> "SAN CRUZ", "SAN DIEGO", "SAN FRANCISCO", "SAN JOSE", "SAVANNAH", > >> "SCHENECTADY", "SCRANTON", "SEATTLE", "SHREVEPORT", "SOMERVILLE", > >> "SOUTH BEND", "SPOKANE", "SPRINGFIELD", "ST LOUIS", "ST PAUL", > >> "ST PETERSBURG", "SYRACUSE", "TACOMA", "TAMPA", "TOLEDO", > >> "TRENTON", "TUCSON", "TULSA", "UTICA", "WASHINGTON", "WATERBURY", > >> "WICHITA", "WILMINGTON", "WORCESTER", "YONKERS", "YOUNGSTOWN" > >> ), class = "factor"), cell_number = c(17379L, 17027L, 19514L, > >> 17745L, 20256L, 21323L, 18104L, 21329L, 18779L, 20254L), > >> longitude = c(-81.519005, -73.756232, -106.609991, -75.490183, > >> -84.387982, -97.743061, -76.612189, -91.14032, -121.635963, > >> -86.80249), latitude = c(41.081445, 42.652579, 35.110703, > >> 40.608431, 33.748995, 30.267153, 39.290385, 30.458283, 37.871744, > >> 33.520661), State = structure(c(29L, 28L, 27L, 32L, 10L, > >> 35L, 19L, 17L, 4L, 1L), .Label = c(" ALA", " ARIZ", " ARK", > >> " CAL", " COLO", " CONN", " DC", " DEL", " FLA", " GA", " HAWAII", > >> " ILL", " IND", " IOWA", " KANS", " KY", " LA", " MASS", > >> " MD", " MICH", " MINN", " MO", " NC", " NEBR", " NEV", " NJ", > >> " NM", " NY", " OHIO", " OKLA", " ORE", " PA", " RI", " TENN", > >> " TEX", " UTAH", " VA", " WASH", " WIS", "CAL", "CONN", "IDAH", > >> "KY", "MASS"), class = "factor"), avsft = c(-7.81, -16.06, > >> -7.71999999999997, -1.88999999999999, 2.90000000000003, 5.12, > >> -5.02999999999997, 9.33000000000004, 15.08, 2.89000000000004 > >> ), year = c(1970L, 1970L, 1970L, 1970L, 1970L, 1970L, 1970L, > >> 1970L, 1970L, 1970L), day = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, > >> 1L, 1L, 1L), hour = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, > >> 0L), yearweek = c(197001L, 197001L, 197001L, 197001L, 197001L, > >> 197001L, 197001L, 197001L, 197001L, 197001L), week = c(1L, > >> 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("yearday", > >> "City", "cell_number", "longitude", "latitude", "State", "avsft", > >> "year", "day", "hour", "yearweek", "week"), row.names = c(NA, > >> 10L), class = "data.frame") > > > > > > Sincerely, > > > > Shouro > > > > [[alternative HTML version deleted]] > > > > ______________________________________________ > > R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see > > https://stat.ethz.ch/mailman/listinfo/r-help > > PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html > > and provide commented, minimal, self-contained, reproducible code. >[[alternative HTML version deleted]]