@comment{{Prefer citations from https://dl.acm.org/, if available. DOI as citekey, if available.}}
@inproceedings{10.1145/318898.318923, author = {Copeland, George P. and Khoshafian, Setrag N.}, title = {A Decomposition Storage Model}, year = {1985}, isbn = {0897911601}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://dl.acm.org/doi/pdf/10.1145/318898.318923}, doi = {10.1145/318898.318923}, booktitle = {Proceedings of the 1985 ACM SIGMOD International Conference on Management of Data}, pages = {268–279}, numpages = {12}, location = {Austin, Texas, USA}, series = {SIGMOD '85} }
@inproceedings{10.1145/800083.802685, author = {Copeland, George}, title = {What If Mass Storage Were Free?}, year = {1980}, isbn = {9781450373951}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://dl.acm.org/doi/pdf/10.1145/800083.802685}, doi = {10.1145/800083.802685}, abstract = {This paper investigates how database systems would be designed and used under the limiting-case assumption that mass storage is free. It is argued that free mass storage would free database systems from the limitations and problems caused by conventional deletion techniques. A non-deletion strategy would significantly simplify database systems and their operation, as well as increase their functionality and availability. Consideration of this limiting case helps shed light on a more realistic argument: if the cost of mass storage were low enough, then deletion would become undesirable.It is also argued that the often labor-intensive costs and time delays involved in archival and retrieval of older data can be minimized if a single technology were available with low-cost on-line storage and a low-cost archival media with long shelf life.Optical discs promise to come one to two orders of magnitude closer to the limiting case of free mass storage than ever before. Other features of optical discs include improved reliability and a single technology for both on-line and archival storage with a long shelf life. Because of these features and because of (not in spite of) their non-deletion limitation, it is argued that optical discs fit the requirements of database systems better than magnetic discs and tapes.}, booktitle = {Proceedings of the Fifth Workshop on Computer Architecture for Non-Numeric Processing}, pages = {1–7}, numpages = {7}, location = {Pacific Grove, California, USA}, series = {CAW '80} }
@article{10.1145/971697.602300, author = {Copeland, George and Maier, David}, title = {Making {Smalltalk} a Database System}, year = {1984}, issue_date = {June 1984}, volume = {14}, number = {2}, issn = {0163-5808}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://dl.acm.org/doi/pdf/10.1145/971697.602300}, doi = {10.1145/971697.602300}, abstract = {To overcome limitations in the modeling power of existing database systems and provide a better tool for database application programming, Servio Logic Corporation is developing a computer system to support a set-theoretic data model in an object-oriented programming environment We recount the problems with existing models and database systems We then show how features of Smalltalk, such such as operational semantics, its type hierarchy, entity identity and the merging of programming and data language, solve many of those problems Nest we consider what Smalltalk lacks as a database system secondary storage management, a declarative semantics, concurrency, past states To address these shortcomings, we needed a formal data model We introduce the GemStone data model, and show how it helps to define path expressions, a declarative semantics and object history in the OPAL language We summarize similar approaches, and give a brief overview of the GemStone system implementation}, journal = {SIGMOD Rec.}, month = jun, pages = {316–325}, numpages = {10} }
@article{10.1109/69.755613, author = {Jensen, Christian S. and Snodgrass, Richard Thomas}, title = {Temporal Data Management}, year = {1999}, issue_date = {January 1999}, publisher = {IEEE Educational Activities Department}, address = {USA}, volume = {11}, number = {1}, issn = {1041-4347}, url = {http://www2.cs.arizona.edu/~rts/pubs/TKDEJan99.pdf}, doi = {10.1109/69.755613}, abstract = {A wide range of database applications manage time-varying information. Existing database technology currently provides little support for managing such data. The research area of temporal databases has made important contributions in characterizing the semantics of such information and in providing expressive and efficient means to model, store, and query temporal data. This paper introduces the reader to temporal data management, surveys state-of-the-art solutions to challenging aspects of temporal data management, and points to research directions.}, journal = {IEEE Trans. on Knowl. and Data Eng.}, month = jan, pages = {36–44}, numpages = {9}, keywords = {temporal database, SQL, time-constrained database, Query language, TSQL2, transaction time, valid time., temporal data model, user-defined time} }
@book{10.5555/320037, author = {Snodgrass, Richard Thomas}, title = {Developing Time-Oriented Database Applications in {SQL}}, year = {1999}, isbn = {1558604367}, doi = {10.5555/320037}, url = {http://www2.cs.arizona.edu/~rts/tdbbook.pdf}, publisher = {Morgan Kaufmann Publishers Inc.}, address = {San Francisco, CA, USA} }
@article{10.1145/3180143, author = {Ngo, Hung Q. and Porat, Ely and R\'{e}, Christopher and Rudra, Atri}, title = {Worst-Case Optimal Join Algorithms}, year = {2018}, issue_date = {June 2018}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {65}, number = {3}, issn = {0004-5411}, url = {https://www.cs.stanford.edu/people/chrismre/papers/paper49.Ngo.pdf}, doi = {10.1145/3180143}, abstract = {Efficient join processing is one of the most fundamental and well-studied tasks in database research. In this work, we examine algorithms for natural join queries over many relations and describe a new algorithm to process these queries optimally in terms of worst-case data complexity. Our result builds on recent work by Atserias, Grohe, and Marx, who gave bounds on the size of a natural join query in terms of the sizes of the individual relations in the body of the query. These bounds, however, are not constructive: they rely on Shearer’s entropy inequality, which is information-theoretic. Thus, the previous results leave open the question of whether there exist algorithms whose runtimes achieve these optimal bounds. An answer to this question may be interesting to database practice, as we show in this article that any project-join style plans, such as ones typically employed in a relational database management system, are asymptotically slower than the optimal for some queries. We present an algorithm whose runtime is worst-case optimal for all natural join queries. Our result may be of independent interest, as our algorithm also yields a constructive proof of the general fractional cover bound by Atserias, Grohe, and Marx without using Shearer’s inequality. This bound implies two famous inequalities in geometry: the Loomis-Whitney inequality and its generalization, the Bollob\'{a}s-Thomason inequality. Hence, our results algorithmically prove these inequalities as well. Finally, we discuss how our algorithm can be used to evaluate full conjunctive queries optimally, to compute a relaxed notion of joins and to optimally (in the worst-case) enumerate all induced copies of a fixed subgraph inside of a given large graph.}, journal = {J. ACM}, month = mar, articleno = {16}, numpages = {40}, keywords = {Join Algorithms, fractional cover bound, Bollob\'{a}s-Thomason inequality, Loomis-Whitney inequality} }
@article{10.14778/3436905.3436913, author = {Li, Tianyu and Butrovich, Matthew and Ngom, Amadou and Lim, Wan Shen and McKinney, Wes and Pavlo, Andrew}, title = {Mainlining Databases: Supporting Fast Transactional Workloads on Universal Columnar Data File Formats}, year = {2021}, issue_date = {December 2020}, publisher = {VLDB Endowment}, volume = {14}, number = {4}, issn = {2150-8097}, url = {https://db.cs.cmu.edu/papers/2020/p534-li.pdf}, doi = {10.14778/3436905.3436913}, abstract = {The proliferation of modern data processing tools has given rise to open-source columnar data formats. These formats help organizations avoid repeated conversion of data to a new format for each application. However, these formats are read-only, and organizations must use a heavy-weight transformation process to load data from on-line transactional processing (OLTP) systems. As a result, DBMSs often fail to take advantage of full network bandwidth when transferring data. We aim to reduce or even eliminate this overhead by developing a storage architecture for in-memory database management systems (DBMSs) that is aware of the eventual usage of its data and emits columnar storage blocks in a universal open-source format. We introduce relaxations to common analytical data formats to efficiently update records and rely on a lightweight transformation process to convert blocks to a read-optimized layout when they are cold. We also describe how to access data from third-party analytical tools with minimal serialization overhead. We implemented our storage engine based on the Apache Arrow format and integrated it into the NoisePage DBMS to evaluate our work. Our experiments show that our approach achieves comparable performance with dedicated OLTP DBMSs while enabling orders-of-magnitude faster data exports to external data science and machine learning tools than existing methods.}, journal = {Proceedings of the VLDB Endowment}, month = feb, pages = {534–546}, numpages = {13} }
@inproceedings{CIDR-BonczMonetDBX100HyperPipeliningQueryExecution, title = {{MonetDB/X100}: Hyper-Pipelining Query Execution}, author = {Boncz, Peter A. and Zukowski, Marcin and Nes, Niels}, booktitle = {Second Biennial Conference on Innovative Data Systems Research, {CIDR} 2005, Asilomar, CA, USA, January 4-7, 2005, Online Proceedings}, pages = {225–237}, publisher = {www.cidrdb.org}, year = {2005}, url = {http://cidrdb.org/cidr2005/papers/P19.pdf}, timestamp = {Mon, 18 Jul 2022 17:13:00 +0200}, biburl = {https://dblp.org/rec/conf/cidr/BonczZN05.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{10.1007/s00778-002-0074-9, author = {Ailamaki, Anastassia and DeWitt, David J. and Hill, Mark D.}, title = {Data Page Layouts for Relational Databases on Deep Memory Hierarchies}, year = {2002}, issue_date = {November 2002}, publisher = {Springer-Verlag}, address = {Berlin, Heidelberg}, volume = {11}, number = {3}, issn = {1066-8888}, url = {https://research.cs.wisc.edu/multifacet/papers/vldbj02_pax.pdf}, doi = {10.1007/s00778-002-0074-9}, abstract = {Relational database systems have traditionally optimized for I/O performance and organized records sequentially on disk pages using the N-ary Storage Model (NSM) (a.k.a., slotted pages). Recent research, however, indicates that cache utilization and performance is becoming increasingly important on modern platforms. In this paper, we first demonstrate that in-page data placement is the key to high cache performance and that NSM exhibits low cache utilization on modern platforms. Next, we propose a new data organization model called PAX (Partition Attributes Across), that significantly improves cache performance by grouping together all values of each attribute within each page. Because PAX only affects layout inside the pages, it incurs no storage penalty and does not affect I/O behavior. According to our experimental results (which were obtained without using any indices on the participating relations), when compared to NSM: (a) PAX exhibits superior cache and memory bandwidth utilization, saving at least 75\% of NSM's stall time due to data cache accesses; (b) range selection queries and updates on memory-resident relations execute 1725\% faster; and (c) TPC-H queries involving I/O execute 1148\% faster. Finally, we show that PAX performs well across different memory system designs.}, journal = {The VLDB Journal}, month = nov, pages = {198–215}, numpages = {18}, keywords = {Disk page layout, Relational data placement, Cache-conscious database systems} }
@techreport{ISO/IEC-9075-2:2016, author = {ISO/IEC 9075-2:2016}, title = {Information technology — Database languages — {SQL} — Part 2: Foundation {(SQL/Foundation)}}, note = {https://www.iso.org/standard/63556.html}, url = {https://www.iso.org/standard/63556.html}, year = {2021}, month = aug, type = {Standard}, institution = {ISO/IEC} }
@techreport{ISO/IEC-19075-2:2021, author = {ISO/IEC 19075-2:2021}, title = {Information technology — Guidance for the use of database language {SQL} — Part 2: Time-related information}, note = {https://www.iso.org/standard/78933.html}, url = {https://www.iso.org/standard/78933.html}, year = {2021}, month = aug, type = {Standard}, institution = {ISO/IEC} }
@techreport{ISO/IEC-19075-6:2021, author = {ISO/IEC 19075-6:2021}, title = {Information technology — Guidance for the use of database language {SQL} — Part 6: Support for {JSON}}, note = {https://www.iso.org/standard/78937.html}, url = {https://www.iso.org/standard/78937.html}, year = {2021}, month = aug, type = {Standard}, institution = {ISO/IEC} }
@inproceedings{10.1145/3318464.3380579, author = {Nathan, Vikram and Ding, Jialin and Alizadeh, Mohammad and Kraska, Tim}, title = {Learning Multi-Dimensional Indexes}, year = {2020}, isbn = {9781450367356}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://arxiv.org/pdf/1912.01668.pdf}, doi = {10.1145/3318464.3380579}, abstract = {Scanning and filtering over multi-dimensional tables are key operations in modern analytical database engines. To optimize the performance of these operations, databases often create clustered indexes over a single dimension or multi-dimensional indexes such as R-Trees, or use complex sort orders (e.g., Z-ordering). However, these schemes are often hard to tune and their performance is inconsistent across different datasets and queries. In this paper, we introduce Flood, a multi-dimensional in-memory read-optimized index that automatically adapts itself to a particular dataset and workload by jointly optimizing the index structure and data storage layout. Flood achieves up to three orders of magnitude faster performance for range scans with predicates than state-of-the-art multi-dimensional indexes or sort orders on real-world datasets and workloads. Our work serves as a building block towards an end-to-end learned database system.}, booktitle = {Proceedings of the 2020 ACM SIGMOD International Conference on Management of Data}, pages = {985–1000}, numpages = {16}, keywords = {in-memory, multi-dimensional, indexing, databases, primary index}, location = {Portland, OR, USA}, series = {SIGMOD '20} }
@misc{YouTube-Raberg-YjAVsvYGbuU, author = {Råberg, Håkan}, title = {The Design and Implementation of a Bitemporal {DBMS}}, url = {https://www.youtube.com/watch?v=YjAVsvYGbuU}, year = {2019}, month = sep, keywords = {temporal, bitemporal, z-curves}, location = {Helsinki, Finland}, series = {ClojuTRE 2019} }
@misc{YouTube-Raberg-Px-7TlceM5A, author = {Råberg, Håkan}, title = {Light and Adaptive Indexing for Immutable Databases}, url = {https://www.youtube.com/watch?v=Px-7TlceM5A}, year = {2022}, month = sep, keywords = {machine learning, adaptive indexes, databases, indexing, separation of storage from compute}, location = {St. Louis, MO, USA}, series = {Strange Loop 2020} }
@inproceedings{CIDR-IdreosDatabaseCracking, author = {Idreos, Stratos and Kersten, Martin and Manegold, Stefan}, title = {Database Cracking.}, booktitle = {Conference on Innovative Data Systems Research}, year = {2007}, month = {01}, url = {https://stratos.seas.harvard.edu/files/IKM_CIDR07.pdf} }
@inproceedings{10.1145/800296.811515, author = {Chamberlin, Donald D. and Boyce, Raymond F.}, title = {{SEQUEL}: A Structured {English} Query Language}, year = {1974}, isbn = {9781450374156}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://dl.acm.org/doi/pdf/10.1145/800296.811515}, doi = {10.1145/800296.811515}, abstract = {In this paper we present the data manipulation facility for a structured English query language (SEQUEL) which can be used for accessing data in an integrated relational data base. Without resorting to the concepts of bound variables and quantifiers SEQUEL identifies a set of simple operations on tabular structures, which can be shown to be of equivalent power to the first order predicate calculus. A SEQUEL user is presented with a consistent set of keyword English templates which reflect how people use tables to obtain information. Moreover, the SEQUEL user is able to compose these basic templates in a structured manner in order to form more complex queries. SEQUEL is intended as a data base sublanguage for both the professional programmer and the more infrequent data base user.}, booktitle = {Proceedings of the 1974 ACM SIGFIDET (Now SIGMOD) Workshop on Data Description, Access and Control}, pages = {249–264}, numpages = {16}, keywords = {Information Retrieval, Data Base Management Systems, Query Languages, Data Manipulation Languages}, location = {Ann Arbor, Michigan}, series = {SIGFIDET '74} }
@article{10.1145/362384.362685, author = {Codd, E. F.}, title = {A Relational Model of Data for Large Shared Data Banks}, year = {1970}, issue_date = {June 1970}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {13}, number = {6}, issn = {0001-0782}, url = {https://dl.acm.org/doi/pdf/10.1145/362384.362685}, doi = {10.1145/362384.362685}, abstract = {Future users of large data banks must be protected from having to know how the data is organized in the machine (the internal representation). A prompting service which supplies such information is not a satisfactory solution. Activities of users at terminals and most application programs should remain unaffected when the internal representation of data is changed and even when some aspects of the external representation are changed. Changes in data representation will often be needed as a result of changes in query, update, and report traffic and natural growth in the types of stored information.Existing noninferential, formatted data systems provide users with tree-structured files or slightly more general network models of the data. In Section 1, inadequacies of these models are discussed. A model based on n-ary relations, a normal form for data base relations, and the concept of a universal data sublanguage are introduced. In Section 2, certain operations on relations (other than logical inference) are discussed and applied to the problems of redundancy and consistency in the user's model.}, journal = {Commun. ACM}, month = jun, pages = {377–387}, numpages = {11}, keywords = {composition, data base, redundancy, data structure, data bank, predicate calculus, retrieval language, relations, hierarchies of data, data organization, data integrity, consistency, networks of data, security, derivability, join} }
@article{10.1145/262762.262770, author = {McHugh, Jason and Abiteboul, Serge and Goldman, Roy and Quass, Dallas and Widom, Jennifer}, title = {Lore: A Database Management System for Semistructured Data}, year = {1997}, issue_date = {Sept. 1997}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, volume = {26}, number = {3}, issn = {0163-5808}, url = {https://dl.acm.org/doi/pdf/10.1145/262762.262770}, doi = {10.1145/262762.262770}, abstract = {Lore (for Lightweight Object Repository) is a DBMS designed specifically for managing semistructured information. Implementing Lore has required rethinking all aspects of a DBMS, including storage management, indexing, query processing and optimization, and user interfaces. This paper provides an overview of these aspects of the Lore system, as well as other novel features such as dynamic structural summaries and seamless access to data from external sources.}, journal = {SIGMOD Rec.}, month = sep, pages = {54–66}, numpages = {13} }
@inproceedings{10.1145/1376616.1376645, author = {Brantner, Matthias and Florescu, Daniela and Graf, David and Kossmann, Donald and Kraska, Tim}, title = {Building a Database on {S3}}, year = {2008}, isbn = {9781605581026}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://people.csail.mit.edu/kraska/pub/sigmod08-s3.pdf}, doi = {10.1145/1376616.1376645}, abstract = {There has been a great deal of hype about Amazon's simple storage service (S3). S3 provides infinite scalability and high availability at low cost. Currently, S3 is used mostly to store multi-media documents (videos, photos, audio) which are shared by a community of people and rarely updated. The purpose of this paper is to demonstrate the opportunities and limitations of using S3 as a storage system for general-purpose database applications which involve small objects and frequent updates. Read, write, and commit protocols are presented. Furthermore, the cost (\$), performance, and consistency properties of such a storage system are studied.}, booktitle = {Proceedings of the 2008 ACM SIGMOD International Conference on Management of Data}, pages = {251–264}, numpages = {14}, keywords = {ec2, database, simpledb, cost trade-off, aws, storage system, s3, concurrency, sqs, performance, cloud computing, eventual consistency}, location = {Vancouver, Canada}, series = {SIGMOD '08} }
This file was generated by bibtex2html 1.99.