@Article{CloutierVilhuber2017,
author = {Julie Cloutier and Lars Vilhuber and Denis Harrisson and Vanessa B{\'e}land-Ouellette},
title = {Understanding the effect of procedural justice on psychological distress},
journal = {International Journal of Stress Management},
year = {2017},
volume = {forthcoming},
abstract = {Studies on the effect of procedural justice on psychological distress present conflicting results. Drawing on instrumental and relational perspectives of justice, we test the hypothesis that the perception of procedural justice influences the level of workers' psychological distress. Using a number of validated instruments to collected data from 659 workers in three call centers, we use OLS regressions and Hayes' PROCESS tool to show that the perception of procedural justice has a direct, unique, and independent effect on psychological distress. The perception of procedural justice has no instrumental role, the key mechanism being the relational role, suggesting that perceived injustice influences psychological distress because it threatens self-esteem. Distributive justice perceptions (recognition, promotions, job security) are not associated with psychological distress, calling into question Siegrist's model. Our findings suggest that perceived procedural justice provides workers better evidence of the extent to which they are valued and appreciated members of their organizations than do perceptions of distributive justice. The results highlight the greater need for workers to be valued and appreciated for who they are (consideration and esteem), rather than for what they do for their organization (distributive justice of rewards).},
owner = {vilhuber},
timestamp = {2016.11.22},
}

@InProceedings{HaneySIGMOD2017,
author = {Samuel Haney and Ashwin Machanavajjhala and John M. Abowd and Matthew Graham and Mark Kutzbach and Lars Vilhuber},
title = {Utility Cost of Formal Privacy for Releasing National Employer-Employee Statistics},
booktitle = {Proceedings of the 2017 International Conference on Management of Data},
year = {2017},
volume = {forthcoming},
series = {SIGMOD '17},
publisher = {ACM},
doi = {10.1145/3035918.3035940},
url = {http://dx.doi.org/10.1145/3035918.3035940},
abstract = {National statistical agencies around the world publish tabular summaries based on combined employer-employee (ER-EE) data. The privacy of both individuals and business establishments that feature in these data are protected by law in most countries. These data are currently released using a variety of statistical disclosure limitation (SDL) techniques that do not reveal the exact characteristics of particular employers and employees, but lack provable privacy guarantees limiting inferential disclosures.
In this work, we present novel algorithms for releasing tabular summaries of linked ER-EE data with formal, provable guarantees of privacy. We show that state-of-the-art differentially private algorithms add too much noise for the output to be useful. Instead, we identify the privacy requirements mandated by current interpretations of the relevant laws, and formalize them using the Pufferfish framework. We then develop new privacy definitions that are customized to ER-EE data and satisfy the statutory privacy requirements. We implement the experiments in this paper on production data gathered by the U.S. Census Bureau. An empirical evaluation of utility for these data shows that for reasonable values of the privacy-loss parameter $\epsilon\geq$ 1, the additive error introduced by our provably private algorithms is comparable, and in some cases better, than the error introduced by existing SDL techniques that have no provable privacy guarantees. For some complex queries currently published, however, our algorithms do not have utility comparable to the existing traditional SDL algorithms. Those queries are fodder for future research.},
acmid = {3035940},
journal = {SIGMOD},
owner = {vilhuber},
timestamp = {2017.03.01},
}

@article {MirandaVilhuber:Using:SJIAOS:2016,
title = {Using partially synthetic microdata to protect
sensitive cells in business statistics},
author = {Javier Miranda and Lars Vilhuber},
journal = {Statistical Journal of the International Association for Official Statistics},
year=2016,
volume={32},
number={1},
pages={69-80},
doi={10.3233/SJI-160963},
abstract={We describe and analyze a method that blends records from both observed and synthetic microdata into public-use tabulations on establishment statistics. The resulting tables use synthetic data only in potentially sensitive cells. We describe different algorithms, and present preliminary results when applied to the Census Bureau's Business Dynamics Statistics and Synthetic Longitudinal Business Database, highlighting accuracy and protection afforded by the method when compared to existing public-use tabulations (with suppressions).},
}

@article{VilhuberAbowdReiter:Synthetic:SJIAOS:2016,
title = {Synthetic establishment microdata around the world},
journal = {Statistical Journal of the International Association for Official Statistics},
author = {Lars Vilhuber and John M. Abowd and Jerome P. Reiter},
year=2016,
volume={32},
number={1},
pages={65-68},
doi={10.3233/SJI-160964},
abstract={In contrast to the many public-use microdata samples available for individual and household data from many statistical agencies around the world, there are virtually no establishment or firm microdata available. In large part, this difficulty in providing access to business micro data is due to the skewed and sparse distributions that characterize business data. Synthetic data are simulated data generated from statistical models. We organized sessions at the 2015 World Statistical Congress and the 2015 Joint Statistical Meetings, highlighting work on synthetic establishment microdata. This overview situates those papers, published in this issue, within the broader literature.},
}

@InCollection{psd2014b,
Title = {Synthetic Longitudinal Business Databases for International Comparisons},
Author = {Drechsler, J\"org and Vilhuber, Lars},
Booktitle = {Privacy in Statistical Databases},
Publisher = {Springer International Publishing},
Year = {2014},
Editor = {Domingo-Ferrer, Josep},
Pages = {243-252},
Series = {Lecture Notes in Computer Science},
Volume = {8744},
Abstract = {International comparison studies on economic activity are often hampered by the fact that access to business microdata is very limited on an international level. A recently launched project tries to overcome these limitations by improving access to Business Censuses from multiple countries based on synthetic data. Starting from the synthetic version of the longitudinally edited version of the U.S. Business Register (the Longitudinal Business Database, LBD), the idea is to create similar data products in other countries by applying the synthesis methodology developed for the LBD to generate synthetic replicates that could be distributed without confidentiality concerns. In this paper we present some first results of this project based on German business data collected at the Institute for Employment Research.},
DOI = {10.1007/978-3-319-11257-2_19},
ISBN = {978-3-319-11256-5},
Keywords = {business data; confidentiality; international comparison; multiple imputation; synthetic},
URL = {http://dx.doi.org/10.1007/978-3-319-11257-2_19}
}

J. Drechsler and L. Vilhuber, “A First Step Towards A German SynLBD: Constructing A German Longitudinal Business Database,” Statistical Journal of the IAOS: Journal of the International Association for Official Statistics, vol. 30, 2014. [DOI][URL][Bibtex]

@Article{SJIAOS-2014b,
Title = {{A First Step Towards A {German} {SynLBD}: {C}onstructing A {G}erman {L}ongitudinal {B}usiness {D}atabase}},
Author = {J{\"o}rg Drechsler and Lars Vilhuber},
Journal = {Statistical Journal of the IAOS: Journal of the International Association for Official Statistics},
Year = {2014},
Volume = {30},
Abstract = {One major criticism against the use of synthetic data has been that the efforts necessary to generate useful synthetic data are so in- tense that many statistical agencies cannot afford them. We argue many lessons in this evolving field have been learned in the early years of synthetic data generation, and can be used in the development of new synthetic data products, considerably reducing the required in- vestments. The final goal of the project described in this paper will be to evaluate whether synthetic data algorithms developed in the U.S. to generate a synthetic version of the Longitudinal Business Database (LBD) can easily be transferred to generate a similar data product for other countries. We construct a German data product with infor- mation comparable to the LBD - the German Longitudinal Business Database (GLBD) - that is generated from different administrative sources at the Institute for Employment Research, Germany. In a fu- ture step, the algorithms developed for the synthesis of the LBD will be applied to the GLBD. Extensive evaluations will illustrate whether the algorithms provide useful synthetic data without further adjustment. The ultimate goal of the project is to provide access to multiple synthetic datasets similar to the SynLBD at Cornell to enable comparative studies between countries. The Synthetic GLBD is a first step towards that goal.},
DOI = {10.3233/SJI-140812},
Keywords = {confidentiality; comparative studies; US Longitudinal Business Database; synthetic data},
Owner = {vilhuber},
Timestamp = {2014.03.24},
URL = {http://iospress.metapress.com/content/X415V18331Q33150}
}

@InProceedings{LagozeJCDL2014,
Title = {CED2AR: The Comprehensive Extensible Data Documentation and Access Repository},
Author = {Carl Lagoze and Lars Vilhuber and Jeremy Williams and Benjamin Perry and William C. Block},
Booktitle = {ACM/IEEE Joint Conference on Digital Libraries (JCDL 2014)},
Year = {2014},
Address = {London, United Kingdom},
Month = {8th-12th September 2014},
Note = {Presented at the ACM/IEEE Joint Conference on Digital Libraries (JCDL 2014)},
Organization = {ACM/IEEE},
Abstract = {Social science researchers increasingly make use of data that is confidential because it contains linkages to the identities of people, corporations, etc. The value of this data lies in the ability to join the identifiable entities with external data such as genome data, geospatial information, and the like. However, the confidentiality of this data is a barrier to its utility and curation, making it difficult to fulfill US federal data management mandates and interfering with basic scholarly practices such as validation and reuse of existing results. We describe the complexity of the relationships among data that span a public and private divide. We then describe our work on the CED2AR prototype, a first step in providing researchers with a tool that spans this divide and makes it possible for them to search, access, and cite that data.},
Owner = {vilhuber},
Timestamp = {2014.07.09}
}

@InCollection{psd2014a,
Title = {Using Partially Synthetic Data to Replace Suppression in the Business Dynamics Statistics: Early Results},
Author = {Miranda, Javier and Vilhuber, Lars},
Booktitle = {Privacy in Statistical Databases},
Publisher = {Springer International Publishing},
Year = {2014},
Editor = {Domingo-Ferrer, Josep},
Pages = {232-242},
Series = {Lecture Notes in Computer Science},
Volume = {8744},
Abstract = {The Business Dynamics Statistics is a product of the U.S. Census Bureau that provides measures of business openings and closings, and job creation and destruction, by a variety of cross-classifications (firm and establishment age and size, industrial sector, and geography). Sensitive data are currently protected through suppression. However, as additional tabulations are being developed, at ever more detailed geographic levels, the number of suppressions increases dramatically. This paper explores the option of providing public-use data that are analytically valid and without suppressions, by leveraging synthetic data to replace observations in sensitive cells.},
DOI = {10.1007/978-3-319-11257-2_18},
ISBN = {978-3-319-11256-5},
Keywords = {synthetic data; statistical disclosure limitation; time-series; local labor markets; gross job flows; confidentiality protection},
URL = {http://dx.doi.org/10.1007/978-3-319-11257-2_18}
}

J. Miranda and L. Vilhuber, “Looking Back On Three Years Of Using The Synthetic LBD Beta,” Statistical Journal of the IAOS: Journal of the International Association for Official Statistics, vol. 30, 2014. [DOI][URL][Bibtex]

@Article{SJIAOS-2014a,
Title = {{Looking Back On Three Years Of Using The {S}ynthetic {LBD} Beta}},
Author = {Miranda, Javier and Lars Vilhuber},
Journal = {Statistical Journal of the IAOS: Journal of the International Association for Official Statistics},
Year = {2014},
Volume = {30},
Abstract = {Distributions of business data are typically much more skewed than those for household or individual data and public knowledge of the underlying units is greater. As a results, national statistical offices (NSOs) rarely release establishment or firm-level business microdata due to the risk to respondent confidentiality. One potential approach for overcoming these risks is to release synthetic data where the establishment data are simulated from statistical models designed to mimic the distributions of the real underlying microdata. The US Census Bureau?s Center for Economic Studies in collaboration with Duke University, the National Institute of Statistical Sciences, and Cornell University made available a synthetic public use file for the Longitudinal Business Database (LBD) comprising more than 20 million records for all business establishment with paid employees dating back to 1976. The resulting product, dubbed the SynLBD, was released in 2010 and is the first-ever comprehensive business microdata set publicly released in the United States including data on establishments employment and payroll, birth and death years, and industrial classification. This pa- per documents the scope of projects that have requested and used the SynLBD.},
DOI = {10.3233/SJI-140811},
Keywords = {confidentiality; comparative studies; US Longitudinal Business Database; synthetic data},
Owner = {vilhuber},
Timestamp = {2014.03.24},
URL = {http://iospress.metapress.com/content/X415V18331Q33150}
}

@Article{AbowdSchneiderVilhuber2013,
Title = {Differential Privacy Applications to Bayesian and Linear Mixed Model Estimation},
Author = {Abowd, John M. and Schneider, Matthew J. and Vilhuber, Lars},
Journal = {Journal of Privacy and Confidentiality},
Year = {2013},
Note = {Article 4},
Number = {1},
Volume = {5},
Abstract = {We consider a particular maximum likelihood estimator (MLE) and a computationally intensive Bayesian method for differentially private estimation of the linear mixed-effects model (LMM) with normal random errors. The LMM is important because it is used in small-area estimation and detailed industry tabulations that present significant challenges for confidentiality protection of the underlying data. The differentially private MLE performs well compared to the regular MLE, and deteriorates as the protection increases for a problem in which the small-area variation is at the county level. More dimensions of random effects are needed to adequately represent the time dimension of the data, and for these cases the differentially private MLE cannot be computed. The direct Bayesian approach for the same model uses an informative, reasonably diffuse prior to compute the posterior predictive distribution for the random effects. The empirical differential privacy of this approach is estimated by direct computation of the relevant odds ratios after deleting influential observations according to various criteria.},
File = {AbowdSchneiderVilhuber2013.pdf:A/AbowdSchneiderVilhuber2013.pdf:PDF},
Owner = {vilhuber},
Timestamp = {2013.07.24},
URL = {http://repository.cmu.edu/jpc/vol5/iss1/4}
}

@Article{DBLP:journals/ijdc/LagozeBWAV13,
Title = {Data Management of Confidential Data},
Author = {Carl Lagoze and William C. Block and Jeremy Williams and John M. Abowd and Lars Vilhuber},
Journal = {International Journal of Digital Curation},
Year = {2013},
Number = {1},
Pages = {265-278},
Volume = {8},
Abstract = {Social science researchers increasingly make use of data that is confidential because it contains linkages to the identities of people, corporations, etc. The value of this data lies in the ability to join the identifiable entities with external data such as genome data, geospatial information, and the like. However, the confidentiality of this data is a barrier to its utility and curation, making it difficult to fulfill US federal data management mandates and interfering with basic scholarly practices such as validation and reuse of existing results. We describe the complexity of the relationships among data that span a public and private divide. We then describe our work on the CED2AR prototype, a first step in providing researchers with a tool that spans this divide and makes it possible for them to search, access, and cite that data.},
Bibsource = {DBLP, http://dblp.uni-trier.de},
Comment = {Presented at 8th International Digital Curation Conference 2013, Amsterdam. See also http://hdl.handle.net/1813/30924},
DOI = {10.2218/ijdc.v8i1.259},
Owner = {vilhuber},
Timestamp = {2013.10.09}
}

@InProceedings{LagozeEtAl2013,
Title = {Encoding Provenance of Social Science Data: Integrating PROV with DDI},
Author = {Carl Lagoze and William C. Block and Jeremy Williams and Lars Vilhuber},
Booktitle = {5th Annual European DDI User Conference},
Year = {2013},
Abstract = {Provenance is a key component of evaluating the integrity and reusability of data for scholarship. While recording and providing access provenance has always been important, it is even more critical in the web environment in which data from distributed sources and of varying integrity can be combined and derived. The PROV model, developed under the auspices of the W3C, is a foundation for semantically-rich, interoperable, and web-compatible provenance metadata. We report on the results of our experimentation with integrating the PROV model into the DDI metadata for a complex, but characteristic, example social science data. We also present some preliminary thinking on how to visualize those graphs in the user interface.},
Keywords = {Metadata, Provenance, DDI, eSocial Science},
Owner = {vilhuber},
Timestamp = {2013.10.09}
}

@Article{AbowdVilhuber2012,
Title = {Did the Housing Price Bubble Clobber Local Labor Market Job and Worker Flows When It Burst?},
Author = {John M. Abowd and Lars Vilhuber},
Journal = {American Economic Review},
Year = {2012},
Month = {May},
Number = {3},
Pages = {589-93},
Volume = {102},
Abstract = { We use the Census Bureau's Quarterly Workforce Indicators and the Federal Housing Finance Agency's House Price Indices to study the effects of the housing price bubble on local labor markets. We show that the 35 MSAs in the top decile of the house price boom were most severely impacted. Their stable job employment fell much more than the national average. Their real wage rates did not fall as fast as the national average. Accessions fell much faster than average while separations were constant. Job creations fell substantially while destructions rose slightly.},
DOI = {10.1257/aer.102.3.589},
Owner = {vilhuber},
Timestamp = {2013.01.11},
URL = {http://ideas.repec.org/a/aea/aecrev/v102y2012i3p589-93.html}
}

@Article{AbowdVilhuber2010,
Title = {National Estimates of Gross Employment and Job Flows from the Quarterly Workforce Indicators with Demographic and Industry Detail},
Author = {John M. Abowd and Lars Vilhuber},
Journal = {Journal of Econometrics},
Year = {2011},
Pages = {82-99},
Volume = {161},
Abstract = {The Quarterly Workforce Indicators (QWI) are local labor market data produced and released every quarter by the United States Census Bureau. Unlike any other local labor market series produced in the US or the rest of the world, QWI measure employment flows for workers (accession and separations), jobs (creations and destructions) and earnings for demographic subgroups (age and gender), economic industry (NAICS industry groups), detailed geography (block (experimental), county, Core-Based Statistical Area, and Workforce Investment Area), and ownership (private, all) with fully interacted publication tables. The current QWI data cover 47 states, about 98\% of the private workforce in those states, and about 92\% of all private employment in the entire economy. State participation is sufficiently extensive to permit us to present the first national estimates constructed from these data. We focus on worker, job, and excess (churning) reallocation rates, rather than on levels of the basic variables. This permits a comparison to existing series from the Job Openings and Labor Turnover Survey and the Business Employment Dynamics Series from the Bureau of Labor Statistics (BLS). The national estimates from the QWI are an important enhancement to existing series because they include demographic and industry detail for both worker and job flow data compiled from underlying micro-data that have been integrated at the job and establishment levels by the Longitudinal Employer-Household Dynamics Program at the Census Bureau. The estimates presented herein were compiled exclusively from public-use data series and are available for download.},
Comment = {Final version published online: 4-MAR-2011},
DOI = {10.1016/j.jeconom.2010.09.008},
File = {AbowdVilhuber2010.pdf:A/AbowdVilhuber2010.pdf:PDF},
Owner = {vilhuber},
Timestamp = {2010.04.04}
}

@Conference{DostieMcKinneyVilhuber2009,
Title = {Using linked employer-employee data to investigate the speed of adjustment in downsizing firms in Canada and the US},
Author = {Dostie, Benoit and McKinney, Kevin L. and Vilhuber, Lars},
Booktitle = { International Census Research Data Center Conference},
Year = {2009},
Address = {Ithaca, NY},
Month = {October},
Abstract = {When firms are faced with a demand shock, adjustment can take many forms. Firms can adjust physical capital, human capital, or
both. The speed of adjustment may differ as well: costs of adjustment, the type of shock, the legal and economic enviroment all matter.
In this paper, we focus on firms that downsized between 1992 and 1997, but ultimately survive, and investigate how the human capital distribution within
a firm influences the speed of adjustment, {\it ceteris paribus}. In other words, when do firms use mass layoffs instead of attrition to adjust the level
of employment.
We combine worker-level wage records and measures of human capital with firm-level characteristics of the production function, and use levels and
changes in these variables to characterize the choice of adjustment method and speed. Firms are described/compared up to 9 years prior to death. We also
consider how workers fare after leaving downsizing firms, and analyze if observed differences in post-separation outcomes of workers provide clues to
the choice of adjustment speed.},
File = {McKinneyVilhuber2006-ESEM2006.pdf:/home/vilhuber/Textes/Papers/Census/CAFE-displacement-death/releases/2006-05-24/McKinneyVilhuber2006-ESEM2006.pdf:PDF},
Institution = {U.S. Census Bureau, LEHD and Cornell University},
Journal = {Conference on Research in Income and Wealth},
Owner = {vilhuber},
Type = {mimeo}
}

2006

K. L. McKinney and L. Vilhuber, “Using linked employer-employee data to investigate the speed of adjustment in downsizing firms,” in Conference on the Analysis of Firms and Employees (CAFE), Nuremberg, Germany, 2006. [Bibtex]

@Conference{McKinneyVilhuber2006,
Title = {Using linked employer-employee data to investigate the speed of adjustment in downsizing firms},
Author = {McKinney, Kevin L. and Vilhuber, Lars},
Booktitle = {Conference on the Analysis of Firms and Employees (CAFE)},
Year = {2006},
Address = {Nuremberg, Germany},
Month = {September},
Abstract = {When firms are faced with a demand shock, adjustment can take many forms. Firms can adjust physical capital, human capital, or
both. The speed of adjustment may differ as well: costs of adjustment, the type of shock, the legal and economic enviroment all matter.
In this paper, we focus on firms that downsized between 1992 and 1997, but ultimately survive, and investigate how the human capital distribution within
a firm influences the speed of adjustment, {\it ceteris paribus}. In other words, when do firms use mass layoffs instead of attrition to adjust the level
of employment.
We combine worker-level wage records and measures of human capital with firm-level characteristics of the production function, and use levels and
changes in these variables to characterize the choice of adjustment method and speed. Firms are described/compared up to 9 years prior to death. We also
consider how workers fare after leaving downsizing firms, and analyze if observed differences in post-separation outcomes of workers provide clues to
the choice of adjustment speed.},
File = {McKinneyVilhuber2006-ESEM2006.pdf:/home/vilhuber/Textes/Papers/Census/CAFE-displacement-death/releases/2006-05-24/McKinneyVilhuber2006-ESEM2006.pdf:PDF},
Institution = {U.S. Census Bureau, LEHD and Cornell University},
Journal = {Conference on Research in Income and Wealth},
Owner = {vilhuber},
Type = {mimeo}
}

Working papers

2017

A. S. Green, M. J. Kutzbach, and L. Vilhuber, “Two Perspectives on Commuting: A Comparison of Home to Work Flows Across Job-Linked Survey and Administrative Files,” Center for Economic Studies, U.S. Census Bureau, Working Papers 17-34, 2017. [URL][Bibtex]

@TechReport{RePEc:cen:wpaper:17-34,
author={Andrew S. Green and Mark J. Kutzbach and Lars Vilhuber},
title={{Two Perspectives on Commuting: A Comparison of Home to Work Flows Across Job-Linked Survey and Administrative Files}},
year=2017,
month=Jan,
institution={Center for Economic Studies, U.S. Census Bureau},
type={Working Papers},
url={https://ideas.repec.org/p/cen/wpaper/17-34.html},
number={17-34},
abstract={Commuting flows and workplace employment data have a wide constituency of users including urban and regional planners, social science and transportation researchers, and businesses. The U.S. Census Bureau releases two, national data products that give the magnitude and characteristics of home to work flows. The American Community Survey (ACS) tabulates households' responses on employment, workplace, and commuting behavior. The Longitudinal Employer-Household Dynamics (LEHD) program tabulates administrative records on jobs in the LEHD Origin-Destination Employment Statistics (LODES). Design differences across the datasets lead to divergence in a comparable statistic: county-to-county aggregate commute flows. To understand differences in the public use data, this study compares ACS and LEHD source files, using identifying information and probabilistic matching to join person and job records. In our assessment, we compare commuting statistics for job frames linked on person, employment status, employer, and workplace and we identify person and job characteristics as well as design features of the data frames that explain aggregate differences. We find a lower rate of within-county commuting and farther commutes in LODES. We attribute these greater distances to differences in workplace reporting and to uncertainty of establishment assignments in LEHD for workers at multi-unit employers. Minor contributing factors include differences in residence location and ACS workplace edits. The results of this analysis and the data infrastructure developed will support further work to understand and enhance commuting statistics in both datasets.},
keywords={U.S. Census Bureau; LEHD; LODES; ACS; Employer-employee matched data; Commuting; Record linkage},
}

@techreport{vilhuber-abowd-reiter-2016-ecommons,
Title = {{Synthetic Establishment Microdata Around the World}},
Author = {Lars Vilhuber and John A. Abowd and Jerome P. Reiter},
institution = {NSF Census Research Network - NCRN-Cornell },
Year = {2016},
number = {1813:42340},
Abstract = {In contrast to the many public-use microdata samples available for individual and household data from many statistical agencies around the world, there are virtually no establishment or firm microdata available. In large part, this difficulty in providing access to business micro data is due to the skewed and sparse distributions that characterize business data. Synthetic data are simulated data generated from statistical models. We organized sessions at the 2015 World Statistical Congress and the 2015 Joint Statistical Meetings, highlighting work on synthetic establishment microdata. This overview situates those papers, published in this issue, within the broader literature.},
Keywords = {confidentiality; comparative studies; US Longitudinal Business Database; synthetic data},
Owner = {vilhuber},
Timestamp = {2014.03.24},
URL = {http://hdl.handle.net/1813/42340}
}

2014

J. Drechsler and L. Vilhuber, “A First Step Towards A German SynLBD: Constructing A German Longitudinal Business Database,” Center for Economic Studies, U.S. Census Bureau, Working Papers 14-13, 2014. [URL][Bibtex]

@TechReport{RePEc:cen:wpaper:14-13,
Title = {{A First Step Towards A {German} {SynLBD}: {C}onstructing A {G}erman {L}ongitudinal {B}usiness {D}atabase}},
Author = {J{\"o}rg Drechsler and Lars Vilhuber},
Institution = {Center for Economic Studies, U.S. Census Bureau},
Year = {2014},
Month = Feb,
Number = {14-13},
Type = {Working Papers},
Abstract = {One major criticism against the use of synthetic data has been that the efforts necessary to generate useful synthetic data are so in- tense that many statistical agencies cannot afford them. We argue many lessons in this evolving field have been learned in the early years of synthetic data generation, and can be used in the development of new synthetic data products, considerably reducing the required in- vestments. The final goal of the project described in this paper will be to evaluate whether synthetic data algorithms developed in the U.S. to generate a synthetic version of the Longitudinal Business Database (LBD) can easily be transferred to generate a similar data product for other countries. We construct a German data product with infor- mation comparable to the LBD - the German Longitudinal Business Database (GLBD) - that is generated from different administrative sources at the Institute for Employment Research, Germany. In a fu- ture step, the algorithms developed for the synthesis of the LBD will be applied to the GLBD. Extensive evaluations will illustrate whether the algorithms provide useful synthetic data without further adjustment. The ultimate goal of the project is to provide access to multiple synthetic datasets similar to the SynLBD at Cornell to enable comparative studies between countries. The Synthetic GLBD is a first step towards that goal.},
Keywords = {confidentiality; comparative studies; German Longitudinal Business Database; synthetic data},
Owner = {vilhuber},
Timestamp = {2014.03.24},
URL = {http://ideas.repec.org/p/cen/wpaper/14-13.html}
}

J. Miranda and L. Vilhuber, “Looking Back On Three Years Of Using The Synthetic LBD Beta,” Center for Economic Studies, U.S. Census Bureau, Working Papers 14-11, 2014. [URL][Bibtex]

@TechReport{RePEc:cen:wpaper:14-11,
Title = {{Looking Back On Three Years Of Using The {S}ynthetic {LBD} Beta}},
Author = {Miranda, Javier and Lars Vilhuber},
Institution = {Center for Economic Studies, U.S. Census Bureau},
Year = {2014},
Month = Feb,
Number = {14-11},
Type = {Working Papers},
Abstract = {Distributions of business data are typically much more skewed than those for household or individual data and public knowledge of the underlying units is greater. As a results, national statistical offices (NSOs) rarely release establishment or firm-level business microdata due to the risk to respondent confidentiality. One potential approach for overcoming these risks is to release synthetic data where the establishment data are simulated from statistical models designed to mimic the distributions of the real underlying microdata. The US Census Bureau?s Center for Economic Studies in collaboration with Duke University, the National Institute of Statistical Sciences, and Cornell University made available a synthetic public use file for the Longitudinal Business Database (LBD) comprising more than 20 million records for all business establishment with paid employees dating back to 1976. The resulting product, dubbed the SynLBD, was released in 2010 and is the first-ever comprehensive business microdata set publicly released in the United States including data on establishments employment and payroll, birth and death years, and industrial classification. This pa- per documents the scope of projects that have requested and used the SynLBD.},
Keywords = {confidentiality; comparative studies; US Longitudinal Business Database; synthetic data},
Owner = {vilhuber},
Timestamp = {2014.03.24},
URL = {http://ideas.repec.org/p/cen/wpaper/14-11.html}
}

@TechReport{RePEc:cen:wpaper:14-26,
Title = {{LEHD Infrastructure files in the Census RDC - Overview}},
Author = {Lars Vilhuber and Kevin McKinney},
Institution = {Center for Economic Studies, U.S. Census Bureau},
Year = {2014},
Month = Jun,
Number = {14-26},
Type = {Working Papers},
Abstract = {The Longitudinal Employer-Household Dynamics (LEHD) Program at the U.S. Census Bureau, with the support of several national research agencies, maintains a set of infrastructure files using administrative data provided by state agencies, enhanced with information from other administrative data sources, demographic and economic (business) surveys and censuses. The LEHD Infrastructure Files provide a detailed and comprehensive picture of workers, employers, and their interaction in the U.S. economy. This document describes the structure and content of the 2011 Snapshot of the LEHD Infrastructure files as they are made available in the Census Bureaus secure and restricted-access Research Data Center network. The document attempts to provide a comprehensive description of all researcher-accessible files, of their creation, and of any modifcations made to the files to facilitate researcher access.},
URL = {http://ideas.repec.org/p/cen/wpaper/14-26.html}
}

@TechReport{BerubeDostieVilhuber2013,
Title = {Estimation de la contribution de la r\'{e}allocation de la main-d'oeuvre \`{a} la croissance de la productivit\'{e} au {C}anada},
Author = {B\'{e}rub\'{e}, Charles and Benoit Dostie and Lars Vilhuber},
Institution = {Centre sur la productivit\'{e} et la prosp\'{e}rit\'{e}, HEC Montr\'{e}al},
Year = {2013},
Abstract = {In this report, we estimate the contribution of labour reallocation to productivity growth in the Canadian manufacturing sector. We find that most of productivity growth comes from within firm improvements, leaving a limited role for labour reallocation. Still, we also find that the importance of labour reallocation increase over time. This is both due to increasing net-entry and inter-firm effects. These effects are much more important post 2000 than in the 1990s. We also find that lost production from exiting firms is now most likely replaced by production from existing firms, while previously, it was more likely to be replaced by production from new firms. (French only)},
Owner = {vilhuber},
Timestamp = {2013.09.20},
URL = {http://cpp.hec.ca/cms/assets/documents/recherches_publiees/CH_2012_01.pdf}
}

@TechReport{Vilhuber2013,
Title = {Methods for Protecting the Confidentiality of Firm-Level Data: {I}ssues and Solutions},
Author = {Lars Vilhuber},
Institution = {Labor Dynamics Institute},
Year = {2013},
Month = {March},
Number = {19},
Type = {Document},
Abstract = {This report will provide an overview of methods used by statistical agencies to encourage, support, and
enhance research access to data for the purpose of generating new knowledge. Quite a few reports and
scientific articles have addressed the issue before, and we will be highly indebted to that literature. To a
summary of that literature, we hope to provide some recent developments and experiences derived
from a decade of working with systems that increase access as both researchers as well as data
providers. The report will focus on the data provided by statistical agencies, but it should be understood
that government agencies other than a National Statistical Office (NSO) may acquire that function.
While excluding the legal background limiting or permitting such data collection and provision, we
will highlight some alternate sources and methods, prior to concluding.},
Owner = {vilhuber},
Timestamp = {2013.09.20},
URL = {http://digitalcommons.ilr.cornell.edu/ldi/19/}
}

2011

@TechReport{RePEc:cen:wpaper:11-13,
Title = {{LEHD Infrastructure Files in the Census RDC: Overview of S2004 Snapshot}},
Author = {Kevin McKinney and Lars Vilhuber},
Institution = {Center for Economic Studies, U.S. Census Bureau},
Year = {2011},
Month = Apr,
Number = {11-13},
Type = {Working Papers},
Abstract = {The Longitudinal Employer-Household Dynamics (LEHD) Program at the U.S. Census Bureau, with the support of several national research agencies, has built a set of infrastructure files using administrative data provided by state agencies, enhanced with information from other administrative data sources, demographic and economic (business) surveys and censuses. The LEHD Infrastructure Files provide a detailed and comprehensive picture of workers, employers, and their interaction in the U.S. economy. This document describes the structure and content of the 2004 Snapshot of the LEHD Infrastructure files as they are made available in the Census Bureau’s Research Data Center network.},
Keywords = {LEHD; linked employer-employee data; workers; employers; jobs; hires; separations; recalls; mobility},
URL = {http://ideas.repec.org/p/cen/wpaper/11-13.html}
}

@TechReport{ces-wp-10-11,
Title = {National Estimates of Gross Employment and Job Flows from the {Quarterly} {Workforce} {Indicators} with Demographic and Industry Detail (with color graphs)},
Author = {John M. Abowd and Lars Vilhuber},
Institution = {Center for Economic Studies, U.S. Census Bureau},
Year = {2010},
Month = Jun,
Number = {10-11},
Type = {Working Papers},
Abstract = {The Quarterly Workforce Indicators (QWI) are local labor market data produced and released every quarter by the United States Census Bureau. Unlike any other local labor market series produced in the U.S. or the rest of the world, the QWI measure employment flows for workers (accession and separations), jobs (creations and destructions) and earnings for demographic subgroups (age and gender), economic industry (NAICS industry groups), detailed geography (block (experimental), county, Core- Based Statistical Area, and Workforce Investment Area), and ownership (private, all) with fully interacted publication tables. The current QWI data cover 47 states, about 98\% of the private workforce in those states, and about 92\% of all private employment in the entire economy. State participation is sufficiently extensive to permit us to present the first national estimates constructed from these data. We focus on worker, job, and excess (churning) reallocation rates, rather than on levels of the basic variables. This permits comparison to existing series from the Job Openings and Labor Turnover Survey and the Business Employment Dynamics Series from the Bureau of Labor Statistics. The national estimates from the QWI are an important enhancement to existing series because they include demographic and industry detail for both worker and job flow data compiled from underlying micro-data that have been integrated at the job and establishment levels by the Longitudinal Employer-Household Dynamics Program at the Census Bureau. The estimates presented herein were compiled exclusively from public-use data series and are available for download.},
URL = {http://ideas.repec.org/p/cen/wpaper/10-11.html}
}

1999

@TechReport{Vilhuber99b,
Title = {Continuous Training and sectoral mobility in {G}ermany},
Author = {Lars Vilhuber},
Institution = {CIRANO},
Year = {1999},
Number = {99s-03},
Type = {Scientific Series },
Abstract = {This article studies mobility patterns of German workers in light of a model of sector-specific human capital. Furthermore, I employ and describe little-used data on continuous on-the-job training occuring after apprenticeships. Results are presented describing the incidence and duration of continuous training. Continuous training is quite common, depite the high incidence of apprenticeships which precedes this part of a worker's career. Most previous studies have only distinguished between firm-specific and general human capital, generally concluding that training was general. Inconsistent with those conclusions, I show that German men are more likely to find a job within the same sector if they have received continuous training in that sector. These results are similar to results obtained for young U.S. workers, and suggest that sector-specific capital is an important feature of very different labor markets. Furthermore, the results suggest that the observed effect of training on mobility is sensitive to the state of the business cycle, indicating a more complex interaction between supply and demand that most theoretical models allow for.},
Keywords = {training mobility Germany}
}

1997

@TechReport{Vilhuber97a,
Title = {Sector-Specific On-The-Job Training: Evidence from {U.S.} Data},
Author = {Lars Vilhuber},
Institution = {CIRANO},
Year = {1997},
Number = {97s-42},
Type = {Scientific Series },
Abstract = {Using data from the National Longitudinal Survey of Youth (NLSY), we re-examine the effect of formal on-the-job training on mobility patterns of young American workers. By employing parametric duration models, we evaluate the economic impact of training on productive time with an employer. Confirming previous studies, we find a positive and statistically significant impact of formal on-the-job training on tenure with the employer providing the training. However, expected duration net of the time spent in the training program is generally not significantly increased. We proceed to document and analyze intra-sectoral and cross-sectoral mobility patterns in order to infer whether training provides firm-specific, industry-specific, or general human capital. The econometric analysis rejects a sequential model of job separation in favor of a competing risks specification. We find significant evidence for the industry-specificity of training. The probability of sectoral mobility upon job separation decreases with training received in the current industry, whether with the last employer or previous employers, and employment attachment increases with on-the-job training. These results are robust to a number of variations on the base model.},
Jelclass = {J4 J6}
}

1996

@TechReport{Vilhuber96,
Title = {Wage Flexibility and Contract Structure in {G}ermany },
Author = {Lars Vilhuber},
Institution = {CIRANO},
Year = {1996},
Number = {96s-28},
Type = {Scientific Series},
Abstract = {In this paper, we look at how labor market conditions at different points during the tenure of individuals with firms are correlated with current earnings. Using data from the German Socioeconomic Panel on individuals for the period 1984 to 1994, we find that both the contemporaneous unemployment rate and prior values of the unemployment rate are significantly correlated with current earnings, contrary to results for the American labor market. We interpret this result as evidence that German unions do in fact bargain over both wages and employment, but that the models of individualistic contracts, such as the implicit contract model, may explain some of the observed wage drift and longer-term wage movements reasonably well. Furthermore, we explore the heterogeneity of contracts over a variety of worker and job characteristics. In particular, we find evidence that contracts differ across industries and across firm size. Workers of large firms are remarkably more insulated from the job market than workers for any other type of firm, indicating the importance of internal job markets. },
Jelclass = {J23, J31, J41},
Mylibrary = {yes}
}