Kevin Duh at JHU

BibTeX Entries

  
  
  
  
  
  @incollection{zhang-duh-2023-sign-language,
  author = {Zhang, Xuan and Duh, Kevin},
  title = {Sign Language Gloss Translation},
  booktitle = {Sign Language Machine Translation},
  editor = {Way, Andy and Leeson, Loraine and Shterionov, Dimitar and Rathmann, Christian},
  year = {2023},
  publisher = {Springer},
  url={https://link.springer.com/book/9783031473616}
  }
  
  
@inproceedings{sia24where,
 author = {Sia, Suzanna and Mueller, David and Duh, Kevin},
 booktitle = {Advances in Neural Information Processing Systems},
 publisher = {Curran Associates, Inc.},
 title = {Where does In-Context Learning Happen in Large Language Models?},
 volume = {37},
 year = {2024}
  }
  
  
  @inproceedings{cai24privacy,
  author = {Cai, Zexin and Li Xinyuan, Henry and Grag, Ashi and Garc\'ia-Perera, Leibny Paola and Duh, Kevin and Khudanpur, Sanjeev and Andrews, Nicholas and Wiesner, Matthew},
  booktitle = {IEEE Spoken Language Technology Workshop},
  title = {Privacy versus Emotion Preservation Trade-offs in Emotion-Preserving Speaker Anonymization},
  year = {2024},
}


  
@inproceedings{sia-etal-2024-anti,
    title = "Anti-{LM} Decoding for Zero-shot In-context Machine Translation",
    author = "Sia, Suzanna  and
      DeLucia, Alexandra  and
      Duh, Kevin",
    editor = "Duh, Kevin  and
      Gomez, Helena  and
      Bethard, Steven",
    booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",
    month = jun,
    year = "2024",
    address = "Mexico City, Mexico",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2024.findings-naacl.216",
    doi = "10.18653/v1/2024.findings-naacl.216",
    pages = "3403--3420",
    abstract = "Zero-shot In-context learning is the phenomenon where models can perform a task given only the instructions. However, pre-trained large language models are known to be poorly calibrated for zero-shot tasks. One of the most effective approaches to handling this bias is to adopt a contrastive decoding objective, which accounts for the prior probability of generating the next token by conditioning on a context. This work introduces an Anti-Language Model objective with a decay factor designed to address the weaknesses of In-context Machine Translation. We conduct our experiments across 3 model types and sizes, 3 language directions, and for both greedy decoding and beam search. The proposed method outperforms other state-of-the-art decoding objectives, with up to 20 BLEU point improvement from the default objective in some settings.",
}
  
  
@inproceedings{han-etal-2024-speechqe,
    title = "{S}peech{QE}: Estimating the Quality of Direct Speech Translation",
    author = "Han, HyoJung  and
      Duh, Kevin  and
      Carpuat, Marine",
    editor = "Al-Onaizan, Yaser  and
      Bansal, Mohit  and
      Chen, Yun-Nung",
    booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2024.emnlp-main.1218",
    doi = "10.18653/v1/2024.emnlp-main.1218",
    pages = "21852--21867",
    abstract = "Recent advances in automatic quality estimation for machine translation have exclusively focused on written language, leaving the speech modality underexplored. In this work, we formulate the task of quality estimation for speech translation (SpeechQE), construct a benchmark, and evaluate a family of systems based on cascaded and end-to-end architectures. In this process, we introduce a novel end-to-end system leveraging pre-trained text LLM. Results suggest that end-to-end approaches are better suited to estimating the quality of direct speech translation than using quality estimation systems designed for text in cascaded systems. More broadly, we argue that quality estimation of speech translation needs to be studied as a separate problem from that of text, and release our [data and models](https://github.com/h-j-han/SpeechQE) to guide further research in this space.",
}
  
  
@inproceedings{bamfo-odoom-etal-2024-synthetic,
    title = "Can Synthetic Speech Improve End-to-End Conversational Speech Translation?",
    author = "Bamfo Odoom, Bismarck  and
      Robinson, Nathaniel  and
      Rippeth, Elijah  and
      Tavarez-Arce, Luis  and
      Murray, Kenton  and
      Wiesner, Matthew  and
      McNamee, Paul  and
      Koehn, Philipp  and
      Duh, Kevin",
    editor = "Knowles, Rebecca  and
      Eriguchi, Akiko  and
      Goel, Shivali",
    booktitle = "Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)",
    month = sep,
    year = "2024",
    address = "Chicago, USA",
    publisher = "Association for Machine Translation in the Americas",
    url = "https://aclanthology.org/2024.amta-research.15",
    pages = "167--177",
    abstract = "Conversational speech translation is an important technology that fosters communication among people of different language backgrounds. Three-way parallel data in the form of source speech, source transcript, and target translation is usually required to train end-to-end systems. However, such datasets are not readily available and are expensive to create as this involves multiple annotation stages. In this paper, we investigate the use of synthetic data from generative models, namely machine translation and text-to-speech synthesis, for training conversational speech translation systems. We show that adding synthetic data to the training recipe increasingly improves end-to-end training performance, especially when limited real data is available. However, when no real data is available, no amount of synthetic data helps.",
}
  
  
@inproceedings{zhang-duh-2024-best,
    title = "Best Practices of Successive Halving on Neural Machine Translation and Large Language Models",
    author = "Zhang, Xuan  and
      Duh, Kevin",
    editor = "Knowles, Rebecca  and
      Eriguchi, Akiko  and
      Goel, Shivali",
    booktitle = "Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)",
    month = sep,
    year = "2024",
    address = "Chicago, USA",
    publisher = "Association for Machine Translation in the Americas",
    url = "https://aclanthology.org/2024.amta-research.12",
    pages = "130--139",
    abstract = "Hyperparameter optimization (HPO) enhances neural machine translation (NMT) models but demands substantial computational resources. Successive halving, a multi-fidelity HPO method, mitigates this by early stopping unpromising models and allocating more resources to promising ones. This method is particularly relevant for NMT and large language models, which are computationally intensive. However, successive halving relies on a noisy estimation of model performance and assumes that early performance is highly correlated with final performance. We introduce a table lookup benchmark dataset to study the reliability of successive halving and propose best practices for its application in NMT and large language models.",
  }
  
  
@inproceedings{verma-etal-2024-exploring,
    title = "Exploring Geometric Representational Disparities between Multilingual and Bilingual Translation Models",
    author = "Verma, Neha  and
      Murray, Kenton  and
      Duh, Kevin",
    editor = "Calzolari, Nicoletta  and
      Kan, Min-Yen  and
      Hoste, Veronique  and
      Lenci, Alessandro  and
      Sakti, Sakriani  and
      Xue, Nianwen",
    booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
    month = may,
    year = "2024",
    address = "Torino, Italia",
    publisher = "ELRA and ICCL",
    url = "https://aclanthology.org/2024.lrec-main.604",
    pages = "6909--6921",
    abstract = "Multilingual machine translation has proven immensely useful for both parameter efficiency and overall performance across many language pairs via complete multilingual parameter sharing. However, some language pairs in multilingual models can see worse performance than in bilingual models, especially in the one-to-many translation setting. Motivated by their empirical differences, we examine the geometric differences in representations from bilingual models versus those from one-to-many multilingual models. Specifically, we compute the isotropy of these representations using intrinsic dimensionality and IsoScore, in order to measure how the representations utilize the dimensions in their underlying vector space. Using the same evaluation data in both models, we find that for a given language pair, its multilingual model decoder representations are consistently less isotropic and occupy fewer dimensions than comparable bilingual model decoder representations. Additionally, we show that much of the anisotropy in multilingual decoder representations can be attributed to modeling language-specific information, therefore limiting remaining representational capacity.",
  }
  
  
@inproceedings{viechnicki-etal-2024-large,
    title = "Large-Scale Bitext Corpora Provide New Evidence for Cognitive Representations of Spatial Terms",
    author = "Viechnicki, Peter  and
      Duh, Kevin  and
      Kostacos, Anthony  and
      Landau, Barbara",
    editor = "Graham, Yvette  and
      Purver, Matthew",
    booktitle = "Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = mar,
    year = "2024",
    address = "St. Julian{'}s, Malta",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2024.eacl-long.66",
    pages = "1089--1099",
    abstract = "Recent evidence from cognitive science suggests that there exist two classes of cognitive representations within the spatial terms of a language, one represented geometrically (e.g., above, below) and the other functionally (e.g., on, in). It has been hypothesized that geometric terms are more constrained and are mastered relatively early in language learning, whereas functional terms are less constrained and are mastered over longer time periods (Landau, 2016). One consequence of this hypothesis is that these two classes should exhibit different cross-linguistic variability, which is supported by human elicitation studies. In this work we present to our knowledge the first corpus-based empirical test of this hypothesis. We develop a pipeline for extracting, isolating, and aligning spatial terms in basic locative constructions from parallel text. Using Shannon entropy to measure the variability of spatial term use across eight languages, we find supporting evidence that variability in functional terms differs significantly from that of geometric terms. We also perform latent variable modeling and find support for the division of spatial terms into geometric and functional classes.",
}

  
  
@inproceedings{zhang-duh-2023-handshape,
    title = "Handshape-Aware Sign Language Recognition: Extended Datasets and Exploration of Handshape-Inclusive Methods",
    author = "Zhang, Xuan  and
      Duh, Kevin",
    editor = "Bouamor, Houda  and
      Pino, Juan  and
      Bali, Kalika",
    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
    month = dec,
    year = "2023",
    address = "Singapore",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.findings-emnlp.198",
    doi = "10.18653/v1/2023.findings-emnlp.198",
    pages = "2993--3002",
    abstract = "The majority of existing work on sign language recognition encodes signed videos without explicitly acknowledging the phonological attributes of signs. Given that handshape is a vital parameter in sign languages, we explore the potential of handshape-aware sign language recognition. We augment the PHOENIX14T dataset with gloss-level handshape labels, resulting in the new PHOENIX14T-HS dataset. Two unique methods are proposed for handshape-inclusive sign language recognition: a single-encoder network and a dual-encoder network, complemented by a training strategy that simultaneously optimizes both the CTC loss and frame-level cross-entropy loss. The proposed methodology consistently outperforms the baseline performance. The dataset and code can be accessed at: www.anonymous.com.",
}

  
@inproceedings{zhang-etal-2023-machine,
    title = "Machine Translation with Large Language Models: Prompting, Few-shot Learning, and Fine-tuning with {QL}o{RA}",
    author = "Zhang, Xuan  and
      Rajabi, Navid  and
      Duh, Kevin  and
      Koehn, Philipp",
    editor = "Koehn, Philipp  and
      Haddow, Barry  and
      Kocmi, Tom  and
      Monz, Christof",
    booktitle = "Proceedings of the Eighth Conference on Machine Translation",
    month = dec,
    year = "2023",
    address = "Singapore",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.wmt-1.43",
    doi = "10.18653/v1/2023.wmt-1.43",
    pages = "468--481",
    abstract = "While large language models have made remarkable advancements in natural language generation, their potential in machine translation, especially when fine-tuned, remains under-explored. In our study, we conduct comprehensive experiments, evaluating 15 publicly available language models on machine translation tasks. We compare the performance across three methodologies: zero-shot prompting, few-shot learning, and fine-tuning. Central to our approach is the use of QLoRA, an efficient fine-tuning method. On French-English, QLoRA fine-tuning outperforms both few-shot learning and models trained from scratch. This superiority is highlighted in both sentence-level and document-level translations, with a significant BLEU score improvement of 28.93 over the prompting method. Impressively, with QLoRA, the enhanced performance is achieved by fine-tuning a mere 0.77{\%} of the model{'}s parameters.",
  }
  
  
  @inproceedings{zhang-etal-2023-hyperparameter,
    title = "A Hyperparameter Optimization Toolkit for Neural Machine Translation Research",
    author = "Zhang, Xuan  and
      Duh, Kevin  and
      McNamee, Paul",
    booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 3: System Demonstrations)",
    month = jul,
    year = "2023",
    address = "Toronto, Canada",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.acl-demo.15",
    pages = "161--168",
    abstract = "Hyperparameter optimization is an important but often overlooked process in the research of deep learning technologies. To obtain a good model, one must carefully tune hyperparameters that determine the architecture and training algorithm. Insufficient tuning may result in poor results, while inequitable tuning may lead to exaggerated differences between models. We present a hyperparameter optimization toolkit for neural machine translation (NMT) to help researchers focus their time on the creative rather than the mundane. The toolkit is implemented as a wrapper on top of the open-source Sockeye NMT software. Using the Asynchronous Successive Halving Algorithm (ASHA), we demonstrate that it is possible to discover near-optimal models under a computational budget with little effort.Code: https://github.com/kevinduh/sockeye-recipes3Video demo: https://cs.jhu.edu/ kevinduh/j/demo.mp4",
}

  
  @inproceedings{xiao23hk,
    title = "HK-LegiCoST: Leveraging Non-Verbatim Transcripts for Speech Translation",
    author = "Xiao, Cihan and Xinyuan, Henry Li and Yang, Jinyi and Gao, Dongji and Wiesner, Matthew and  Duh, Kevin and Khudanpur, Sanjeev",
    booktitle = "Interspeech 2023, 24th Annual Conference of the International Speech Communication Association",
    year = "2023",
  }
  
  @inproceedings{gwinnup-etal-2023-enhancing,
    title = "Enhancing Video Translation Context with Object Labels",
    author = "Gwinnup, Jeremy  and
      Anderson, Tim  and
      Ore, Brian  and
      Hansen, Eric  and
      Duh, Kevin",
    booktitle = "Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)",
    month = jul,
    year = "2023",
    address = "Toronto, Canada (in-person and online)",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.iwslt-1.8",
    pages = "130--137",
    abstract = "We present a simple yet efficient method to enhance the quality of machine translation models trained on multimodal corpora by augmenting the training text with labels of detected objects in the corresponding video segments. We then test the effects of label augmentation in both baseline and two automatic speech recognition (ASR) conditions. In contrast with multimodal techniques that merge visual and textual features, our modular method is easy to implement and the results are more interpretable. Comparisons are made with Transformer translation architectures trained with baseline and augmented labels, showing improvements of up to +1.0 BLEU on the How2 dataset.",
}
  
  @inproceedings{agrawal-etal-2023-findings,
    title = "{FINDINGS} {OF} {THE} {IWSLT} 2023 {EVALUATION} {CAMPAIGN}",
    author = {Agarwal, Milind  and
      Agrawal, Sweta  and
      Anastasopoulos, Antonios  and
      Bentivogli, Luisa  and
      Bojar, Ond{\v{r}}ej  and
      Borg, Claudia  and
      Carpuat, Marine  and
      Cattoni, Roldano  and
      Cettolo, Mauro  and
      Chen, Mingda  and
      Chen, William  and
      Choukri, Khalid  and
      Chronopoulou, Alexandra  and
      Currey, Anna  and
      Declerck, Thierry  and
      Dong, Qianqian  and
      Duh, Kevin  and
      Est{\`e}ve, Yannick  and
      Federico, Marcello  and
      Gahbiche, Souhir  and
      Haddow, Barry  and
      Hsu, Benjamin  and
      Mon Htut, Phu  and
      Inaguma, Hirofumi  and
      Javorsk{\'y}, D{\'a}vid  and
      Judge, John  and
      Kano, Yasumasa  and
      Ko, Tom  and
      Kumar, Rishu  and
      Li, Pengwei  and
      Ma, Xutai  and
      Mathur, Prashant  and
      Matusov, Evgeny  and
      McNamee, Paul  and
      P. McCrae, John  and
      Murray, Kenton  and
      Nadejde, Maria  and
      Nakamura, Satoshi  and
      Negri, Matteo  and
      Nguyen, Ha  and
      Niehues, Jan  and
      Niu, Xing  and
      Kr. Ojha, Atul  and
      E. Ortega, John  and
      Pal, Proyag  and
      Pino, Juan  and
      van der Plas, Lonneke  and
      Pol{\'a}k, Peter  and
      Rippeth, Elijah  and
      Salesky, Elizabeth  and
      Shi, Jiatong  and
      Sperber, Matthias  and
      St{\"u}ker, Sebastian  and
      Sudoh, Katsuhito  and
      Tang, Yun  and
      Thompson, Brian  and
      Tran, Kevin  and
      Turchi, Marco  and
      Waibel, Alex  and
      Wang, Mingxuan  and
      Watanabe, Shinji  and
      Zevallos, Rodolfo},
    booktitle = "Proceedings of the 20th International Conference on Spoken Language Translation (IWSLT 2023)",
    month = jul,
    year = "2023",
    address = "Toronto, Canada (in-person and online)",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.iwslt-1.1",
    pages = "1--61",
    abstract = "This paper reports on the shared tasks organized by the 20th IWSLT Conference. The shared tasks address 9 scientific challenges in spoken language translation: simultaneous and offline translation, automatic subtitling and dubbing, speech-to-speech translation, multilingual, dialect and low-resource speech translation, and formality control. The shared tasks attracted a total of 38 submissions by 31 teams. The growing interest towards spoken language translation is also witnessed by the constantly increasing number of shared task organizers and contributors to the overview paper, almost evenly distributed across industry and academia.",
}

  
  @inproceedings{mcnamee-duh-2023-extensive,
    title = "An Extensive Exploration of Back-Translation in 60 Languages",
    author = "McNamee, Paul  and
      Duh, Kevin",
    booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
    month = jul,
    year = "2023",
    address = "Toronto, Canada",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.findings-acl.518",
    pages = "8166--8183",
    abstract = "Back-translation is a data augmentation technique that has been shown to improve model quality through the creation of synthetic training bitext. Early studies showed the promise of the technique and follow on studies have produced additional refinements. We have undertaken a broad investigation using back-translation to train models from 60 languages into English; the majority of these languages are considered moderate- or low-resource languages. We observed consistent gains, though compared to prior work we saw conspicuous gains in quite a number of lower-resourced languages. We analyzed differences in translations between baseline and back-translation models, and observed many indications of improved translation quality. Translation of both rare and common terms is improved, and these improvements occur despite the less natural synthetic source-language text used in training.",
}
  
  @InProceedings{sia23incontext, 
    author = {Suzanna Sia and Kevin Duh}, 
    title = {In-context Learning as Maintaining Coherency: A Study of On-the-fly Machine Translation Using Large Language Models}, 
    booktitle = {Proceedings of Machine Translation Summit XIV (Volume 1: Research Track)}, year = {2023}, 
  }

  
@inproceedings{wicks-duh-2022-effects,
    title = "The Effects of Language Token Prefixing for Multilingual Machine Translation",
    author = "Wicks, Rachel  and
      Duh, Kevin",
    booktitle = "Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)",
    month = nov,
    year = "2022",
    address = "Online only",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.aacl-short.19",
    pages = "148--153",
}

  
  @article{yu22multimedia,
  title="Multimedia Curriculum Learning for Language Acquisition",
  author = "Yu, Pengfei and Ji, Heng and Chang, Shih-fu and Duh, Kevin",
  journal = "Journal of Science and Technology on Information and Communications",
  year = 2022
  }
  
  
  @inproceedings{deb-etal-2022-post,
    title = "Post-Hoc Interpretation of Transformer Hyperparameters with Explainable Boosting Machines",
    author = "Deb, Kiron  and
      Zhang, Xuan  and
      Duh, Kevin",
    booktitle = "Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP",
    month = dec,
    year = "2022",
    address = "Abu Dhabi, United Arab Emirates (Hybrid)",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.blackboxnlp-1.5",
    pages = "51--61",
}
  
  
@inproceedings{sia-etal-2022-offer,
    title = "Offer a Different Perspective: Modeling the Belief Alignment of Arguments in Multi-party Debates",
    author = "Sia, Suzanna  and
      Jaidka, Kokil  and
      Ahuja, Hansin  and
      Chhaya, Niyati  and
      Duh, Kevin",
    booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
    month = dec,
    year = "2022",
    address = "Abu Dhabi, United Arab Emirates",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.emnlp-main.818",
    pages = "11939--11950",
  }
  
  
  @inproceedings{ogundepo-etal-2022-africlirmatrix,
    title = "{A}fri{CLIRM}atrix: Enabling Cross-Lingual Information Retrieval for {A}frican Languages",
    author = "Ogundepo, Odunayo  and
      Zhang, Xinyu  and
      Sun, Shuo  and
      Duh, Kevin  and
      Lin, Jimmy",
    booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
    month = dec,
    year = "2022",
    address = "Abu Dhabi, United Arab Emirates",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.emnlp-main.597",
  pages = "8721--8728",
  }
  
  
  @inproceedings{marchisio-etal-2022-isovec,
    title = "{I}so{V}ec: Controlling the Relative Isomorphism of Word Embedding Spaces",
    author = "Marchisio, Kelly  and
      Verma, Neha  and
      Duh, Kevin  and
      Koehn, Philipp",
    booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
    month = dec,
    year = "2022",
    address = "Abu Dhabi, United Arab Emirates",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.emnlp-main.404",
    pages = "6019--6033",
}
 
  
  @inproceedings{marchisio-etal-2022-bilingual,
    title = "Bilingual Lexicon Induction for Low-Resource Languages using Graph Matching via Optimal Transport",
    author = "Marchisio, Kelly  and
      Saad-Eldin, Ali  and
      Duh, Kevin  and
      Priebe, Carey  and
      Koehn, Philipp",
    booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
    month = dec,
    year = "2022",
    address = "Abu Dhabi, United Arab Emirates",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.emnlp-main.164",
    pages = "2545--2561",
  }
  
  
  @inproceedings{sia22incontext,
  title = "Prefix Embeddings for In-Context Machine Translation",
  author = "Sia, Suzanna and Duh, Kevin",
  year = "2022",
  booktitle = "Proceedings of the 15th Conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)",
  }

  
  @inproceedings{verma22adapt,
  title = "Strategies for Adapting Multilingual Pre-training for Domain-Specific Machine Translation",
  author = "Verma, Neha and Murray, Kenton and Duh, Kevin",
  year = "2022",
  booktitle = "Proceedings of the 15th Conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)",
  }
  
  
  @InProceedings{nair22transfer,
  author="Nair, Suraj
  and Yang, Eugene
  and Lawrie, Dawn
  and Duh, Kevin
  and McNamee, Paul
  and Murray, Kenton
  and Mayfield, James
  and Oard, Douglas W.",
  editor="Hagen, Matthias
  and Verberne, Suzan
  and Macdonald, Craig
  and Seifert, Christin
  and Balog, Krisztian
  and N{\o}rv{\aa}g, Kjetil
  and Setty, Vinay",
  title="Transfer Learning Approaches for Building Cross-Language Dense Retrieval Models",
  booktitle="Advances in Information Retrieval",
  year="2022",
  publisher="Springer International Publishing",
  address="Cham",
  pages="382--396",
  abstract="The advent of transformer-based models such as BERT has led to the rise of neural ranking models. These models have improved the effectiveness of retrieval systems well beyond that of lexical term matching models such as BM25. While monolingual retrieval tasks have benefited from large-scale training collections such as MS MARCO and advances in neural architectures, cross-language retrieval tasks have fallen behind these advancements. This paper introduces ColBERT-X, a generalization of the ColBERT multi-representation dense retrieval model that uses the XLM-RoBERTa (XLM-R) encoder to support cross-language information retrieval (CLIR). ColBERT-X can be trained in two ways. In zero-shot training, the system is trained on the English MS MARCO collection, relying on the XLM-R encoder for cross-language mappings. In translate-train, the system is trained on the MS MARCO English queries coupled with machine translations of the associated MS MARCO passages. Results on ad hoc document ranking tasks in several languages demonstrate substantial and statistically significant improvements of these trained dense retrieval models over traditional lexical CLIR baselines.",
  isbn="978-3-030-99736-6"
  }
  
  
  
    
  @inproceedings{mcnamee22mttt,
  title = "The Multilingual Microblog Translation Corpus: Improving and Evaluating Translation of User-Generated Text",
  author = {McNamee, Paul and Duh, Kevin},
  booktitle = "Proceedings of the 13th Conference on Language Resources and Evaluation ",
  year = 2022
  }

  
    
  @inproceedings{pereira22temporal,
  title = "Attention-Focused Adversarial Training for Robust Temporal Reasoning",
  author = {Pereira, Lis Kanashiro and Duh, Kevin and Cheng, Fei and Asahara, Masayuki and Kobayashi, Ichiro}},
  booktitle = "Proceedings of the 13th Conference on Language Resources and Evaluation ",
  year = 2022
  }

  
  @inproceedings{anastasopoulos-etal-2022-findings,
    title = "Findings of the {IWSLT} 2022 Evaluation Campaign",
    author = {Anastasopoulos, Antonios  and
      Barrault, Lo{\"\i}c  and
      Bentivogli, Luisa  and
      Zanon Boito, Marcely  and
      Bojar, Ond{\v{r}}ej  and
      Cattoni, Roldano  and
      Currey, Anna  and
      Dinu, Georgiana  and
      Duh, Kevin  and
      Elbayad, Maha  and
      Emmanuel, Clara  and
      Est{\`e}ve, Yannick  and
      Federico, Marcello  and
      Federmann, Christian  and
      Gahbiche, Souhir  and
      Gong, Hongyu  and
      Grundkiewicz, Roman  and
      Haddow, Barry  and
      Hsu, Benjamin  and
      Javorsk{\'y}, D{\'a}vid  and
      Kloudov{\'a}, V{\u{e}}ra  and
      Lakew, Surafel  and
      Ma, Xutai  and
      Mathur, Prashant  and
      McNamee, Paul  and
      Murray, Kenton  and
      N{\v{a}}dejde, Maria  and
      Nakamura, Satoshi  and
      Negri, Matteo  and
      Niehues, Jan  and
      Niu, Xing  and
      Ortega, John  and
      Pino, Juan  and
      Salesky, Elizabeth  and
      Shi, Jiatong  and
      Sperber, Matthias  and
      St{\"u}ker, Sebastian  and
      Sudoh, Katsuhito  and
      Turchi, Marco  and
      Virkar, Yogesh  and
      Waibel, Alexander  and
      Wang, Changhan  and
      Watanabe, Shinji},
    booktitle = "Proceedings of the 19th International Conference on Spoken Language Translation (IWSLT 2022)",
    month = may,
    year = "2022",
    address = "Dublin, Ireland (in-person and online)",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.iwslt-1.10",
    doi = "10.18653/v1/2022.iwslt-1.10",
    pages = "98--157",
    abstract = "The evaluation campaign of the 19th International Conference on Spoken Language Translation featured eight shared tasks: (i) Simultaneous speech translation, (ii) Offline speech translation, (iii) Speech to speech translation, (iv) Low-resource speech translation, (v) Multilingual speech translation, (vi) Dialect speech translation, (vii) Formality control for speech translation, (viii) Isometric speech translation. A total of 27 teams participated in at least one of the shared tasks. This paper details, for each shared task, the purpose of the task, the data that were released, the evaluation metrics that were applied, the submissions that were received and the results that were achieved.",
  }
  
  
  @inproceedings{marchisio-etal-2021-analysis-euclidean,
    title = "An Analysis of {E}uclidean vs. Graph-Based Framing for Bilingual Lexicon Induction from Word Embedding Spaces",
    author = "Marchisio, Kelly  and
      Park, Youngser  and
      Saad-Eldin, Ali  and
      Alyakin, Anton  and
      Duh, Kevin  and
      Priebe, Carey  and
      Koehn, Philipp",
    booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2021",
    month = nov,
    year = "2021",
    address = "Punta Cana, Dominican Republic",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.findings-emnlp.64",
    pages = "738--749",
  }
  
  
  @inproceedings{gordon-etal-2021-data,
    title = "Data and Parameter Scaling Laws for Neural Machine Translation",
    author = "Gordon, Mitchell A  and
      Duh, Kevin  and
      Kaplan, Jared",
    booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
    month = nov,
    year = "2021",
    address = "Online and Punta Cana, Dominican Republic",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.emnlp-main.478",
    pages = "5915--5922",
  }

  
@inproceedings{zhang-duh-2021-approaching,
    title = "Approaching Sign Language Gloss Translation as a Low-Resource Machine Translation Task",
    author = "Zhang, Xuan  and
      Duh, Kevin",
    booktitle = "Proceedings of the 1st International Workshop on Automatic Translation for Signed and Spoken Languages (AT4SSL)",
    month = aug,
    year = "2021",
    address = "Virtual",
    publisher = "Association for Machine Translation in the Americas",
    url = "https://aclanthology.org/2021.mtsummit-at4ssl.7",
    pages = "60--70",
    abstract = "A cascaded Sign Language Translation system first maps sign videos to gloss annotations and then translates glosses into a spoken languages. This work focuses on the second-stage gloss translation component, which is challenging due to the scarcity of publicly available parallel data. We approach gloss translation as a low-resource machine translation task and investigate two popular methods for improving translation quality: hyperparameter search and backtranslation. We discuss the potentials and pitfalls of these methods based on experiments on the RWTH-PHOENIX-Weather 2014T dataset.",
  }
  
  
@inproceedings{wu-etal-2021-sequence,
    title = "Sequence Models for Computational Etymology of Borrowings",
    author = "Wu, Winston  and
      Duh, Kevin  and
      Yarowsky, David",
    booktitle = "Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021",
    month = aug,
    year = "2021",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.findings-acl.353",
    doi = "10.18653/v1/2021.findings-acl.353",
    pages = "4032--4037",
  }
  
  
@inproceedings{martindale-etal-2021-machine,
    title = "Machine Translation Believability",
    author = "Martindale, Marianna  and
      Duh, Kevin  and
      Carpuat, Marine",
    booktitle = "Proceedings of the First Workshop on Bridging Human{--}Computer Interaction and Natural Language Processing",
    month = apr,
    year = "2021",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.hcinlp-1.14",
    pages = "88--95",
    abstract = "Successful Machine Translation (MT) deployment requires understanding not only the intrinsic qualities of MT output, such as fluency and adequacy, but also user perceptions. Users who do not understand the source language respond to MT output based on their perception of the likelihood that the meaning of the MT output matches the meaning of the source text. We refer to this as believability. Output that is not believable may be off-putting to users, but believable MT output with incorrect meaning may mislead them. In this work, we study the relationship of believability to fluency and adequacy by applying traditional MT direct assessment protocols to annotate all three features on the output of neural MT systems. Quantitative analysis of these annotations shows that believability is closely related to but distinct from fluency, and initial qualitative analysis suggests that semantic features may account for the difference.",
  }
  
  
@inproceedings{zhou-etal-2021-self,
    title = "Self-Guided Curriculum Learning for Neural Machine Translation",
    author = "Zhou, Lei  and
      Ding, Liang  and
      Duh, Kevin  and
      Watanabe, Shinji  and
      Sasano, Ryohei  and
      Takeda, Koichi",
    booktitle = "Proceedings of the 18th International Conference on Spoken Language Translation (IWSLT 2021)",
    month = aug,
    year = "2021",
    address = "Bangkok, Thailand (online)",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.iwslt-1.25",
    doi = "10.18653/v1/2021.iwslt-1.25",
    pages = "206--214",
    abstract = "In supervised learning, a well-trained model should be able to recover ground truth accurately, i.e. the predicted labels are expected to resemble the ground truth labels as much as possible. Inspired by this, we formulate a difficulty criterion based on the recovery degrees of training examples. Motivated by the intuition that after skimming through the training corpus, the neural machine translation (NMT) model {``}knows{''} how to schedule a suitable curriculum according to learning difficulty, we propose a self-guided curriculum learning strategy that encourages the NMT model to learn from easy to hard on the basis of recovery degrees. Specifically, we adopt sentence-level BLEU score as the proxy of recovery degree. Experimental results on translation benchmarks including WMT14 English-German and WMT17 Chinese-English demonstrate that our proposed method considerably improves the recovery degree, thus consistently improving the translation performance.",
  }
  
  
@inproceedings{inaguma-etal-2021-espnet,
    title = "{ESP}net-{ST} {IWSLT} 2021 Offline Speech Translation System",
    author = "Inaguma, Hirofumi  and
      Yan, Brian  and
      Dalmia, Siddharth  and
      Guo, Pengcheng  and
      Shi, Jiatong  and
      Duh, Kevin  and
      Watanabe, Shinji",
    booktitle = "Proceedings of the 18th International Conference on Spoken Language Translation (IWSLT 2021)",
    month = aug,
    year = "2021",
    address = "Bangkok, Thailand (online)",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.iwslt-1.10",
    doi = "10.18653/v1/2021.iwslt-1.10",
    pages = "100--109",
    abstract = "This paper describes the ESPnet-ST group{'}s IWSLT 2021 submission in the offline speech translation track. This year we made various efforts on training data, architecture, and audio segmentation. On the data side, we investigated sequence-level knowledge distillation (SeqKD) for end-to-end (E2E) speech translation. Specifically, we used multi-referenced SeqKD from multiple teachers trained on different amounts of bitext. On the architecture side, we adopted the Conformer encoder and the Multi-Decoder architecture, which equips dedicated decoders for speech recognition and translation tasks in a unified encoder-decoder model and enables search in both source and target language spaces during inference. We also significantly improved audio segmentation by using the pyannote.audio toolkit and merging multiple short segments for long context modeling. Experimental evaluations showed that each of them contributed to large improvements in translation performance. Our best E2E system combined all the above techniques with model ensembling and achieved 31.4 BLEU on the 2-ref of tst2021 and 21.2 BLEU and 19.3 BLEU on the two single references of tst2021.",
}
  
  
@INPROCEEDINGS{inaguma21orthros,
  author={Inaguma, Hirofumi and Higuchi, Yosuke and Duh, Kevin and Kawahara, Tatsuya and Watanabe, Shinji},
  booktitle={ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, 
  title={ORTHROS: non-autoregressive end-to-end speech translation With dual-decoder}, 
  year={2021},
  pages={7503-7507},
  doi={10.1109/ICASSP39728.2021.9415093}}

  
@inproceedings{shi-etal-2021-leveraging,
    title = "Leveraging End-to-End {ASR} for Endangered Language Documentation: An Empirical Study on Yol{\'o}xochitl {M}ixtec",
    author = "Shi, Jiatong  and
      Amith, Jonathan D.  and
      Castillo Garc{\'\i}a, Rey  and
      Guadalupe Sierra, Esteban  and
      Duh, Kevin  and
      Watanabe, Shinji",
    booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
    month = apr,
    year = "2021",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.eacl-main.96",
    pages = "1134--1145",
    abstract = "{``}Transcription bottlenecks{''}, created by a shortage of effective human transcribers (i.e., transcriber shortage), are one of the main challenges to endangered language (EL) documentation. Automatic speech recognition (ASR) has been suggested as a tool to overcome such bottlenecks. Following this suggestion, we investigated the effectiveness for EL documentation of end-to-end ASR, which unlike Hidden Markov Model ASR systems, eschews linguistic resources but is instead more dependent on large-data settings. We open source a Yolox{\'o}chitl Mixtec EL corpus. First, we review our method in building an end-to-end ASR system in a way that would be reproducible by the ASR community. We then propose a novice transcription correction task and demonstrate how ASR systems and novice transcribers can work together to improve EL documentation. We believe this combinatory methodology would mitigate the transcription bottleneck and transcriber shortage that hinders EL documentation.",
  }
  
  
@inproceedings{sia-duh-2021-adaptive,
    title = "Adaptive Mixed Component {LDA} for Low Resource Topic Modeling",
    author = "Sia, Suzanna  and
      Duh, Kevin",
    booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
    month = apr,
    year = "2021",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.eacl-main.209",
    pages = "2451--2469",
    abstract = "Probabilistic topic models in low data resource scenarios are faced with less reliable estimates due to sparsity of discrete word co-occurrence counts, and do not have the luxury of retraining word or topic embeddings using neural methods. In this challenging resource constrained setting, we explore mixture models which interpolate between the discrete and continuous topic-word distributions that utilise pre-trained embeddings to improve topic coherence. We introduce an automatic trade-off between the discrete and continuous representations via an adaptive mixture coefficient, which places greater weight on the discrete representation when the corpus statistics are more reliable. The adaptive mixture coefficient takes into account global corpus statistics, and the uncertainty in each topic{'}s continuous distributions. Our approach outperforms the fully discrete, fully continuous, and static mixture model on topic coherence in low resource settings. We additionally demonstrate the generalisability of our method by extending it to handle multilingual document collections.",
}
  
  
@inproceedings{marchisio-etal-2020-unsupervised,
  title = "When Does Unsupervised Machine Translation Work?",
  author = "Marchisio, Kelly  and Duh, Kevin  and Koehn, Philipp",
  booktitle = "Proceedings of the Fifth Conference on Machine Translation",
  month = nov,
  year = "2020",
  address = "Online",
  publisher = "Association for Computational Linguistics",
  url = "https://www.aclweb.org/anthology/2020.wmt-1.68",
  pages = "571--583",
}
  
  
@inproceedings{sun-duh-2020-clirmatrix,
  title = "{CLIRM}atrix: A massively large collection of bilingual and multilingual datasets for Cross-Lingual Information Retrieval",
  author = "Sun, Shuo  and Duh, Kevin",
  booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
  month = nov,
  year = "2020",
  address = "Online",
  publisher = "Association for Computational Linguistics",
  url = "https://www.aclweb.org/anthology/2020.emnlp-main.340",
  doi = "10.18653/v1/2020.emnlp-main.340",
  pages = "4160--4170",
}
  
  
@inproceedings{naradowsky-etal-2020-machine,
  title = "Machine Translation System Selection from Bandit Feedback",
  author = "Naradowsky, Jason  and Zhang, Xuan  and Duh, Kevin",
  booktitle = "Proceedings of the 14th Conference of the Association for Machine Translation in the Americas (Volume 1: Research Track)",
  month = oct,
  year = "2020",
  address = "Virtual",
  publisher = "Association for Machine Translation in the Americas",
  url = "https://www.aclweb.org/anthology/2020.amta-research.5",
  pages = "50--63",
}

  
@article{zhang20benchmark,
  author = {Zhang, Xuan and Duh, Kevin},
  title = {Reproducible and Efficient Benchmarks for Hyperparameter Optimization of Neural Machine Translation Systems},
  journal = {Transactions of the Association for Computational Linguistics},
  volume = {8},
  number = {},
  pages = {393-408},
  year = {2020},
  doi = {10.1162/tacl\_a\_00322},
  URL = {https://doi.org/10.1162/tacl_a_00322},
  eprint = {https://doi.org/10.1162/tacl_a_00322},
}

  
@Inbook{Shinozaki2020,
author="Shinozaki, Takahiro
and Watanabe, Shinji
and Duh, Kevin",
editor="Iba, Hitoshi
and Noman, Nasimul",
title="Automated Development of DNN Based Spoken Language Systems Using Evolutionary Algorithms",
bookTitle="Deep Neural Evolution: Deep Learning with Evolutionary Computation",
year="2020",
publisher="Springer Singapore",
address="Singapore",
pages="97--129",
abstract="Spoken language processing is one of the research areas that has contributed significantly to the recent revival in neural network research. For example, speech recognition has been at the forefront of deep learning research, inventing various novel models. Their dramatic performance improvements compared to previous state-of-the-art implementations have resulted in spoken language systems being deployed in a wide range of applications today. However, these systems require intensive tuning of their network designs and the training setups in order to achieve maximal performance. The laborious effort by human experts is becoming a prominent obstacle in system development. In this chapter, we first explain the basic concepts and the neural network-based implementations of spoken language processing systems. Several types of neural network models will be described. We then introduce our effort to automate the tuning of the system meta-parameters using evolutionary algorithms.",
isbn="978-981-15-3685-4",
doi="10.1007/978-981-15-3685-4_4",
url="https://doi.org/10.1007/978-981-15-3685-4_4"
}


@inproceedings{inaguma-etal-2020-espnet,
    title = "{ESP}net-{ST}: All-in-One Speech Translation Toolkit",
    author = "Inaguma, Hirofumi  and
      Kiyono, Shun  and
      Duh, Kevin  and
      Karita, Shigeki  and
      Yalta, Nelson  and
      Hayashi, Tomoki  and
      Watanabe, Shinji",
    booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations",
    month = jul,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.acl-demos.34",
    doi = "10.18653/v1/2020.acl-demos.34",
    pages = "302--311",
    abstract = "We present ESPnet-ST, which is designed for the quick development of speech-to-speech translation systems in a single framework. ESPnet-ST is a new project inside end-to-end speech processing toolkit, ESPnet, which integrates or newly implements automatic speech recognition, machine translation, and text-to-speech functions for speech translation. We provide all-in-one recipes including data pre-processing, feature extraction, training, and decoding pipelines for a wide range of benchmark datasets. Our reproducible results can match or even outperform the current state-of-the-art performances; these pre-trained models are downloadable. The toolkit is publicly available at https://github.com/espnet/espnet.",
}


@inproceedings{sun-etal-2020-clireval,
    title = "{CLIR}eval: Evaluating Machine Translation as a Cross-Lingual Information Retrieval Task",
    author = "Sun, Shuo  and
      Sia, Suzanna  and
      Duh, Kevin",
    booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations",
    month = jul,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.acl-demos.18",
    doi = "10.18653/v1/2020.acl-demos.18",
    pages = "134--141",
    abstract = "We present CLIReval, an easy-to-use toolkit for evaluating machine translation (MT) with the proxy task of cross-lingual information retrieval (CLIR). Contrary to what the project name might suggest, CLIReval does not actually require any annotated CLIR dataset. Instead, it automatically transforms translations and references used in MT evaluations into a synthetic CLIR dataset; it then sets up a standard search engine (Elasticsearch) and computes various information retrieval metrics (e.g., mean average precision) by treating the translations as documents to be retrieved. The idea is to gauge the quality of MT by its impact on the document translation approach to CLIR. As a case study, we run CLIReval on the {``}metrics shared task{''} of WMT2019; while this extrinsic metric is not intended to replace popular intrinsic metrics such as BLEU, results suggest CLIReval is competitive in many language pairs in terms of correlation to human judgments of quality. CLIReval is publicly available at https://github.com/ssun32/CLIReval.",
}


@inproceedings{gordon-duh-2020-distill,
    title = "Distill, Adapt, Distill: Training Small, In-Domain Models for Neural Machine Translation",
    author = "Gordon, Mitchell  and
      Duh, Kevin",
    booktitle = "Proceedings of the Fourth Workshop on Neural Generation and Translation",
    month = jul,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.ngt-1.12",
    doi = "10.18653/v1/2020.ngt-1.12",
    pages = "110--118",
    abstract = "We explore best practices for training small, memory efficient machine translation models with sequence-level knowledge distillation in the domain adaptation setting. While both domain adaptation and knowledge distillation are widely-used, their interaction remains little understood. Our large-scale empirical results in machine translation (on three language pairs with three domains each) suggest distilling twice for best performance: once using general-domain data and again using in-domain data with an adapted teacher.",
}

  
  @inproceedings{gordon-etal-2020-compressing,
    title = "Compressing {BERT}: Studying the Effects of Weight Pruning on Transfer Learning",
    author = "Gordon, Mitchell  and
      Duh, Kevin  and
      Andrews, Nicholas",
    booktitle = "Proceedings of the 5th Workshop on Representation Learning for NLP",
    month = jul,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2020.repl4nlp-1.18",
    doi = "10.18653/v1/2020.repl4nlp-1.18",
    pages = "143--155",
    abstract = "Pre-trained universal feature extractors, such as BERT for natural language processing and VGG for computer vision, have become effective methods for improving deep learning models without requiring more labeled data. While effective, feature extractors like BERT may be prohibitively large for some deployment scenarios. We explore weight pruning for BERT and ask: how does compression during pre-training affect transfer learning? We find that pruning affects transfer learning in three broad regimes. Low levels of pruning (30-40{\%}) do not affect pre-training loss or transfer to downstream tasks at all. Medium levels of pruning increase the pre-training loss and prevent useful pre-training information from being transferred to downstream tasks. High levels of pruning additionally prevent models from fitting downstream datasets, leading to further degradation. Finally, we observe that fine-tuning BERT on a specific task does not improve its prunability. We conclude that BERT can be pruned once during pre-training rather than separately for each task without affecting performance.",
}  
  
  
@article{hisamoto20membership,
  author = {Hisamoto, Sorami and Post, Matt and Duh, Kevin},
  title = {Membership Inference Attacks on Sequence-to-Sequence Models: Is My Data In Your Machine Translation System?},
  journal = {Transactions of the Association for Computational Linguistics},
  volume = {8},
  number = {},
  pages = {49-63},
  year = {2020},
  doi = {10.1162/tacl\_a\_00299},
  URL = {https://doi.org/10.1162/tacl_a_00299},
  eprint = {https://doi.org/10.1162/tacl_a_00299},
  abstract = { Data privacy is an important issue for “machine learning as a service” providers. We focus on the problem of membership inference attacks: Given a data sample and black-box access to a model’s API, determine whether the sample existed in the model’s training data. Our contribution is an investigation of this problem in the context of sequence-to-sequence models, which are important in applications such as machine translation and video captioning. We define the membership inference problem for sequence generation, provide an open dataset based on state-of-the-art machine translation models, and report initial results on whether these models leak private information against several kinds of membership inference attacks. }
}

  
@InProceedings{duh20benchmark,
  author = {Kevin Duh and Paul McNamee and Matt Post and Brian Thompston},
  booktitle = {Proceedings of the Language Resources and Evaluation Conference},
  title = {Benchmarking Neural and Statistical Machine Translationon Low-Resource African Languages},
  year = {2020},
  }
  
  
@INPROCEEDINGS{inaguma19e2e, 
  author={H. {Inaguma} and K. {Duh} and T. {Kawahara} and S. {Watanabe}}, 
  booktitle={2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)}, 
  title={Multilingual End-to-End Speech Translation}, 
  year={2019}, 
  volume={}, 
  number={}, 
  pages={570-577}, 
  keywords={Training;Task analysis;Decoding;Pipelines;Speech processing;Speech recognition;Training data;Speech translation;multilingual end-to-end speech translation;attention-based sequence-to-sequence;transfer learning}, 
doi={10.1109/ASRU46091.2019.9003832}, 
  month={Dec},}

  
  
@inproceedings{thompson-etal-2019-hablex,
    title = "{HABL}ex: Human Annotated Bilingual Lexicons for Experiments in Machine Translation",
    author = "Thompson, Brian  and
      Knowles, Rebecca  and
      Zhang, Xuan  and
      Khayrallah, Huda  and
      Duh, Kevin  and
      Koehn, Philipp",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
    month = nov,
    year = "2019",
    address = "Hong Kong, China",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/D19-1142",
    doi = "10.18653/v1/D19-1142",
    pages = "1382--1387",
    abstract = "Bilingual lexicons are valuable resources used by professional human translators. While these resources can be easily incorporated in statistical machine translation, it is unclear how to best do so in the neural framework. In this work, we present the HABLex dataset, designed to test methods for bilingual lexicon integration into neural machine translation. Our data consists of human generated alignments of words and phrases in machine translation test sets in three language pairs (Russian-English, Chinese-English, and Korean-English), resulting in clean bilingual lexicons which are well matched to the reference. We also present two simple baselines - constrained decoding and continued training - and an improvement to continued training to address overfitting.",
}
  
  
@inproceedings{zhang-etal-2019-broad,
    title = "Broad-Coverage Semantic Parsing as Transduction",
    author = "Zhang, Sheng  and
      Ma, Xutai  and
      Duh, Kevin  and
      Van Durme, Benjamin",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
    month = nov,
    year = "2019",
    address = "Hong Kong, China",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/D19-1392",
    doi = "10.18653/v1/D19-1392",
    pages = "3786--3798",
    abstract = "We unify different broad-coverage semantic parsing tasks into a transduction parsing paradigm, and propose an attention-based neural transducer that incrementally builds meaning representation via a sequence of semantic relations. By leveraging multiple attention mechanisms, the neural transducer can be effectively trained without relying on a pre-trained aligner. Experiments separately conducted on three broad-coverage semantic parsing tasks {--} AMR, SDP and UCCA {--} demonstrate that our attention-based neural transducer improves the state of the art on both AMR and UCCA, and is competitive with the state of the art on SDP.",
}
  
  
@InProceedings{post19wmt,
  author    = {Post, Matt and Duh, Kevin},
  title     = {{JHU} 2019 Robustness Task System Description},
  booktitle = {Proceedings of the Fourth Conference on Machine Translation (Volume 2: Shared Task Papers, Day 1)},
  year      = {2019},
  publisher = {Association for Computational Linguistics},
  month     = aug,
  pages     = {552--558},
  doi       = {10.18653/v1/W19-5366},
  url       = {https://www.aclweb.org/anthology/W19-5366},
  address   = {Florence, Italy},
  }
  
  
@InProceedings{lippincott19madar,
  author    = {Lippincott, Tom and Shapiro, Pamela and Duh, Kevin and McNamee, Paul},
  title     = {{JHU} System Description for the {MADAR} {A}rabic Dialect Identification Shared Task},
  booktitle = {Proceedings of the Fourth Arabic Natural Language Processing Workshop},
  year      = {2019},
  publisher = {Association for Computational Linguistics},
  month     = aug,
  pages     = {264--268},
  doi       = {10.18653/v1/W19-4634},
  url       = {https://www.aclweb.org/anthology/W19-4634},
  address   = {Florence, Italy},
}

  
@InProceedings{martindale19identifying,
  author    = {Marianna J. Martindale and Marine Carpuat and Kevin Duh and Paul McNamee},
  title     = {Identifying Fluently Inadequate Output in Neural and Statistical Machine Translation},
  booktitle = {Proceedings of Machine Translation Summit XVII (Volume 1: Research Track)},
  year      = {2019},
  }

    
@InProceedings{renduchintala-shapiro19character,
  author    = {Adithya Renduchintala and Pamela Shapiro and Kevin Duh and Philipp Koehn},
  title     = {Character-Aware Decoder for Translation into Morphologically Rich Languages},
  booktitle = {Proceedings of Machine Translation Summit XVII (Volume 1: Research Track)},
  year      = {2019},
}

  
@InProceedings{ding19bpe,
  author    = {Shuoyang Ding and Adithya Renduchintala and Kevin Duh},
  title     = {A Call for Prudent Choice of Subword Merge Operations},
  booktitle = {Proceedings of Machine Translation Summit XVII (Volume 1: Research Track)},
  year      = {2019},
  }

  
@InProceedings{yarmohammadi19clir,
  author    = {Mahsa Yarmohammadi and Xutai Ma and Sorami Hisamoto and Muhammad Rahman and Yiming Wang and Hainan Xu and Daniel Povey and Philipp Koehn and Kevin Duh},
  title     = {Robust Document Representations for Cross-Lingual Information Retrieval in Low-Resource Settings},
  booktitle = {Proceedings of Machine Translation Summit XVII (Volume 1: Research Track)},
  year      = {2019},
  }
  
  
@InProceedings{shapiro19dialectal,
  author    = {Shapiro, Pamela and Duh, Kevin},
  title     = {Comparing Pipelined and Integrated Approaches to Dialectal {A}rabic Neural Machine Translation},
  booktitle = {Proceedings of the Sixth Workshop on {NLP} for Similar Languages, Varieties and Dialects},
  year      = {2019},
  publisher = {Association for Computational Linguistics},
  month     = jun,
  pages     = {214--222},
  doi       = {10.18653/v1/W19-1424},
  url       = {https://www.aclweb.org/anthology/W19-1424},
}

  
@InProceedings{zhang19amr,
  author    = {Zhang, Sheng and Ma, Xutai and Duh, Kevin and Van Durme, Benjamin},
  title     = {{AMR} Parsing as Sequence-to-Graph Transduction},
  booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
  year      = {2019},
  publisher = {Association for Computational Linguistics},
  month     = jul,
  pages     = {80--94},
  doi       = {10.18653/v1/P19-1009},
  url       = {https://www.aclweb.org/anthology/P19-1009},
  address   = {Florence, Italy},
}
  
  
@InProceedings{zhang19curriculum,
  author    = {Zhang, Xuan and Shapiro, Pamela and Kumar, Gaurav and McNamee, Paul and Carpuat, Marine and Duh, Kevin},
  title     = {Curriculum Learning for Domain Adaptation in Neural Machine Translation},
  booktitle = {Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
  year      = {2019},
  publisher = {Association for Computational Linguistics},
  month     = jun,
  pages     = {1903--1915},
  doi       = {10.18653/v1/N19-1189},
  url       = {https://www.aclweb.org/anthology/N19-1189},
  address   = {Minneapolis, Minnesota},
}
  
  
@InProceedings{thompson19catastrophic,
  author    = {Thompson, Brian and Gwinnup, Jeremy and Khayrallah, Huda and Duh, Kevin and Koehn, Philipp},
  title     = {Overcoming Catastrophic Forgetting During Domain Adaptation of Neural Machine Translation},
  booktitle = {Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
  year      = {2019},
  publisher = {Association for Computational Linguistics},
  month     = jun,
  pages     = {2062--2068},
  doi       = {10.18653/v1/N19-1209},
  url       = {https://www.aclweb.org/anthology/N19-1209},
  address   = {Minneapolis, Minnesota},
}
  

@Article{moriya19evolution,
  author  = {T. {Moriya} and T. {Tanaka} and T. {Shinozaki} and S. {Watanabe} and K. {Duh}},
  title   = {Evolution-Strategy-Based Automation of System Development for High-Performance Speech Recognition},
  journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
  year    = {2019},
  volume  = {27},
  number  = {1},
  month   = {Jan},
  pages   = {77-88},
  issn    = {2329-9290},
  doi     = {10.1109/TASLP.2018.2871755},
}


@InProceedings{khayrallah19interactive,
  author    = {Khayrallah, Huda and Knowles, Rebecca and Duh, Kevin and Post, Matt},
  title     = {An Interactive Teaching Tool for Introducing Novices to Machine Translation},
  booktitle = {Proceedings of the 50th ACM Technical Symposium on Computer Science Education},
  year      = {2019},
  series    = {SIGCSE '19},
  publisher = {ACM},
  location  = {Minneapolis, MN, USA},
  isbn      = {978-1-4503-5890-3},
  pages     = {1276--1276},
  doi       = {10.1145/3287324.3293840},
  url       = {http://doi.acm.org/10.1145/3287324.3293840},
  acmid     = {3293840},
  address   = {New York, NY, USA},
  keywords  = {active learning, machine translation},
  numpages  = {1},
}
   

@InProceedings{zhang18semantic,
  author = 	"Zhang, Sheng
		and Ma, Xutai
		and Rudinger, Rachel
		and Duh, Kevin
		and Van Durme, Benjamin",
  title = 	"Cross-lingual Decompositional Semantic Parsing",
  booktitle = 	"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
  year = 	"2018",
  publisher = 	"Association for Computational Linguistics",
  pages = 	"1664--1675",
  location = 	"Brussels, Belgium",
  url = 	"http://aclweb.org/anthology/D18-1194",
}


@InProceedings{koehn18wmt,
  author    = {Koehn, Philipp and Duh, Kevin and Thompson, Brian},
  title     = {The JHU Machine Translation Systems for WMT 2018},
  booktitle = {Proceedings of the Third Conference on Machine Translation, Volume 2: Shared Task Papers},
  year      = {2018},
  publisher = {Association for Computational Linguistics},
  month     = {October},
  pages     = {442--448},
  url       = {http://www.aclweb.org/anthology/W18-6417},
  address   = {Belgium, Brussels},
}


@InProceedings{thompson18freezing,
  author = 	"Thompson, Brian
		and Khayrallah, Huda
		and Anastasopoulos, Antonios
		and McCarthy, Arya D.
		and Duh, Kevin
		and Marvin, Rebecca
		and McNamee, Paul
		and Gwinnup, Jeremy
		and Anderson, Tim
		and Koehn, Philipp",
  title = 	"Freezing Subnetworks to Analyze Domain Adaptation in Neural Machine Translation",
  booktitle = 	"Proceedings of the Third Conference on Machine Translation: Research Papers",
  year = 	"2018",
  publisher = 	"Association for Computational Linguistics",
  pages = 	"124--132",
  location = 	"Belgium, Brussels",
  url = 	"http://aclweb.org/anthology/W18-6313"
}



@InProceedings{liu18san,
  author = 	"Liu, Xiaodong
		and Shen, Yelong
		and Duh, Kevin
		and Gao, Jianfeng",
  title = 	"Stochastic Answer Networks for Machine Reading Comprehension",
  booktitle = 	"Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
  year = 	"2018",
  publisher = 	"Association for Computational Linguistics",
  pages = 	"1694--1704",
  location = 	"Melbourne, Australia",
  url = 	"http://aclweb.org/anthology/P18-1157"
}


@InProceedings{khayrallah18regularized,
  author = 	"Khayrallah, Huda
		and Thompson, Brian
		and Duh, Kevin
		and Koehn, Philipp",
  title = 	"Regularized Training Objective for Continued Training for Domain Adaptation in Neural Machine Translation",
  booktitle = 	"Proceedings of the 2nd Workshop on Neural Machine Translation and Generation",
  year = 	"2018",
  publisher = 	"Association for Computational Linguistics",
  pages = 	"36--44",
  location = 	"Melbourne, Australia",
  url = 	"http://aclweb.org/anthology/W18-2705"
}


@InProceedings{mei18halo,
  author = 	"Mei, Hongyuan
		and Zhang, Sheng
		and Duh, Kevin
		and Van Durme, Benjamin",
  title = 	"Halo: Learning Semantics-Aware Representations for Cross-Lingual Information Extraction",
  booktitle = 	"Proceedings of the Seventh Joint Conference on Lexical and Computational Semantics",
  year = 	"2018",
  publisher = 	"Association for Computational Linguistics",
  pages = 	"142--147",
  location = 	"New Orleans, Louisiana",
  url = 	"http://aclweb.org/anthology/S18-2017"
}


@InProceedings{zhang18entity,
  author = 	"Zhang, Sheng
		and Duh, Kevin
		and Van Durme, Benjamin",
  title = 	"Fine-grained Entity Typing through Increased Discourse Context and Adaptive Classification Thresholds",
  booktitle = 	"Proceedings of the Seventh Joint Conference on Lexical and Computational Semantics",
  year = 	"2018",
  publisher = 	"Association for Computational Linguistics",
  pages = 	"173--179",
  location = 	"New Orleans, Louisiana",
  url = 	"http://aclweb.org/anthology/S18-2022"
}


@InProceedings{sasaki18letor,
  author = 	"Sasaki, Shota
		and Sun, Shuo
		and Schamoni, Shigehiko
		and Duh, Kevin
		and Inui, Kentaro",
  title = 	"Cross-Lingual Learning-to-Rank with Shared Representations",
  booktitle = 	"Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 2 (Short Papers)",
  year = 	"2018",
  publisher = 	"Association for Computational Linguistics",
  pages = 	"458--463",
  location = 	"New Orleans, Louisiana",
  url = 	"http://aclweb.org/anthology/N18-2073"
}


@InProceedings{shaprio18arabic,
  author = 	"Shapiro, Pamela
		and Duh, Kevin",
  title = 	"Morphological Word Embeddings for Arabic Neural Machine Translation in Low-Resource Settings",
  booktitle = 	"Proceedings of the Second Workshop on Subword/Character LEvel Models",
  year = 	"2018",
  publisher = 	"Association for Computational Linguistics",
  pages = 	"1--11",
  location = 	"New Orleans",
  url = 	"http://aclweb.org/anthology/W18-1201"
}


@InProceedings{sell18audiovisual,
  author = "Gregory Sell and Kevin Duh and David Snyder and Dave Etter and Daniel Garcia-Romero",
  title = "Audio-Visual Person Recognition in Multimedia Data from the {IARPA} {JANUS} Program",
  booktitle = "Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing",
  year = "2018"
}


@InProceedings{inaguma18iwslt,
  author    = {Hirofumi Inaguma and Xuan Zhang and Zhiqi Wang and Adithya Renduchintala and Shinji Watanabe and Kevin Duh},
  title     = {The JHU/Kyoto Machine Translation System for IWSLT 2018},
  booktitle = {Proceedings of the International Workshop on Spoken Language Translation},
  year      = {2018},
}


@InProceedings{zhang17selective,
  author    = {Zhang, Sheng and Duh, Kevin and Van Durme, Benjamin},
  title     = {Selective Decoding for Cross-lingual Open Information Extraction},
  booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
  year      = {2017},
  publisher = {Asian Federation of Natural Language Processing},
  month     = {November},
  pages     = {832--842},
  url       = {http://www.aclweb.org/anthology/I17-1084},
  }
  

@InProceedings{shen17reasoning,
  author    = {Shen, Yelong and Liu, Xiaodong and Duh, Kevin and Gao, Jianfeng},
  title     = {An Empirical Analysis of Multiple-Turn Reasoning Strategies in Reading Comprehension Tasks},
  booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
  year      = {2017},
  publisher = {Asian Federation of Natural Language Processing},
  month     = {November},
  pages     = {957--966},
  url       = {http://www.aclweb.org/anthology/I17-1096},
  }
  

@InProceedings{white17inference,
  author    = {White, Aaron Steven and Rastogi, Pushpendre and Duh, Kevin and Van Durme, Benjamin},
  title     = {Inference is Everything: Recasting Semantic Resources into a Unified Evaluation Framework},
  booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
  year      = {2017},
  publisher = {Asian Federation of Natural Language Processing},
  month     = {November},
  pages     = {996--1005},
  url       = {http://www.aclweb.org/anthology/I17-1100},
}


@InProceedings{khayrallah17adapt,
  author    = {Khayrallah, Huda and Kumar, Gaurav and Duh, Kevin and Post, Matt and Koehn, Philipp},
  title     = {Neural Lattice Search for Domain Adaptation in Machine Translation},
  booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 2: Short Papers)},
  year      = {2017},
  publisher = {Asian Federation of Natural Language Processing},
  month     = {November},
  pages     = {20--25},
  url       = {http://www.aclweb.org/anthology/I17-2004},
}

  
@InProceedings{cotterell17ner,
  author    = {Cotterell, Ryan and Duh, Kevin},
  title     = {Low-Resource Named Entity Recognition with Cross-lingual, Character-Level Neural Conditional Random Fields},
  booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 2: Short Papers)},
  year      = {2017},
  publisher = {Asian Federation of Natural Language Processing},
  month     = {November},
  pages     = {91--96},
  url       = {http://www.aclweb.org/anthology/I17-2016},
}


@InProceedings{wang17multitask,
  author    = {Wang, Dingquan and Peng, Nanyun and Duh, Kevin},
  title     = {A Multi-task Learning Approach to Adapting Bilingual Word Embeddings for Cross-lingual Named Entity Recognition},
  booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 2: Short Papers)},
  year      = {2017},
  publisher = {Asian Federation of Natural Language Processing},
  month     = {November},
  pages     = {383--388},
  url       = {http://www.aclweb.org/anthology/I17-2065},
} 
  

@InProceedings{zhang17mtie,
  Title                    = {{MT/IE}: Cross-lingual Open Information Extraction with Neural Sequence-to-Sequence Models},
  Author                   = {Sheng Zhang and Kevin Duh and Ben {Van Durme}},
  Booktitle                = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics},
  Year                     = {2017},

  Address                  = {Gothenburg, Sweden},
  Publisher                = {Association for Computational Linguistics},
}



@InProceedings{sakaguchi17robsut,
  Title                    = {Robsut Wrod Reocginiton via Semi-Character Recurrent Neural Network},
  Author                   = {Keisuke Sakaguchi and Kevin Duh and Matt Post and Ben {Van Durme}},
  Booktitle                = {Proceedings of the AAAI Conference on Artificial Intelligence (AAAI 2017)},
  Year                     = {2017},
}


@Article{neubig17dynet,
  Title                    = {DyNet: The Dynamic Neural Network Toolkit},
  Author                   = {Graham Neubig and Chris Dyer and Yoav Goldberg and Austin Matthews and Waleed Ammar and Antonios Anastasopoulos and Miguel Ballesteros and David Chiang and Daniel Clothiaux and Trevor Cohn and Kevin Duh and Manaal Faruqui and Cynthia Gan and Dan Garrette and Yangfeng Ji and Lingpeng Kong and Adhiguna Kuncoro and Gaurav Kumar and Chaitanya Malaviya and Paul Michel and Yusuke Oda and Matthew Richardson and Naomi Saphra and Swabha Swayamdipta and Pengcheng Yin},
  Journal                  = {ArXiv},
  Year                     = {2017},
  Month                    = {January},
  Url                      = {https://arxiv.org/abs/1701.03980}
}



@Article{yung17discourse,
  Title                    = {A Psycholinguistic Model for the Marking of Discourse Relations},
  Author                   = {Frances Yung and Kevin Duh and Taku Komura and Yuji Matsumoto},
  Journal                  = {Dialogue and Discourse},
  Year                     = {2017},
  Month                    = {Jan},
  Number                   = {1},
  Pages                    = {106--131},
  Volume                   = {8},
}



@InProceedings{ding17wmt,
  author    = {Ding, Shuoyang and Khayrallah, Huda and Koehn, Philipp and Post, Matt and Kumar, Gaurav and Duh, Kevin},
  title     = {The JHU Machine Translation Systems for WMT 2017},
  booktitle = {Proceedings of the Second Conference on Machine Translation},
  year      = {2017},
  publisher = {Association for Computational Linguistics},
  month     = {September},
  pages     = {276--282},
  url       = {http://www.aclweb.org/anthology/W17-4724},
  address   = {Copenhagen, Denmark},
}


@InProceedings{vandurme17cadet,
author    = {Van Durme, Benjamin and Lippincott, Tom and Duh, Kevin
            and Burchfield, Deana and Poliak, Adam and Costello, Cash and
            Finin, Tim and Miller, Scott and Mayfield, James and
            Koehn, Philipp and Harman, Craig and Lawrie, Dawn and May, Chandler
            and Thomas, Max and Carrell, Annabelle and Chaloux, Julianne
            and Chen, Tongfei and Comerford, Alex and Dredze, Mark and
            Glass, Benjamin and Hao, Shudong and Martin, Patrick and
            Rastogi, Pushpendre and Sankepally, Rashmi and
            Wolfe, Travis and Tran, Ying-Ying and Zhang, Ted},
  title     = {CADET: Computer Assisted Discovery Extraction and Translation},
  booktitle = {Proceedings of the IJCNLP 2017, System Demonstrations},
  year      = {2017},
  publisher = {Association for Computational Linguistics},
  month     = {November},
  pages     = {5--8},
  url       = {http://www.aclweb.org/anthology/I17-3002},
}


@InProceedings{tanaka16evolution,
  Title                    = {Automated structure discovery and parameter tuning of neural network language model based on evolution strategy},
  Author                   = {Tomohiro Tanaka and Takafumi Moriya and Takahiro Shinozaki and Shinji Watanabe and Takaaki Hori and Kevin Duh},
  Booktitle                = {Proceedings of the 2016 IEEE Workshop on Spoken Language Technology},
  Year                     = {2016},
}
  
  

@InProceedings{yung16rational,
  Title                    = {Modelling the Usage of Discourse Connectives as Rational Speech Acts},
  Author                   = {Yung, Frances and Duh, Kevin and Komura, Taku and Matsumoto, Yuji},
  Booktitle                = {Proceedings of The 20th SIGNLL Conference on Computational Natural Language Learning},
  Year                     = {2016},
  Address                  = {Berlin, Germany},
  Month                    = {August},
  Pages                    = {302--313},
  Publisher                = {Association for Computational Linguistics},
  Url                      = {http://www.aclweb.org/anthology/K16-1030}
}
  
  

@InProceedings{ding16wmt,
Title                    = {The JHU Machine Translation Systems for WMT 2016},
Author                   = {Ding, Shuoyang and Duh, Kevin and Khayrallah, Huda and Koehn, Philipp and Post, Matt},
Booktitle                = {Proceedings of the First Conference on Machine Translation},
Year                     = {2016},
Address                  = {Berlin, Germany},
Month                    = {August},
Pages                    = {272--280},
Publisher                = {Association for Computational Linguistics},
Timestamp                = {2016.12.01},
Url                      = {http://www.aclweb.org/anthology/W16-2310}
}



@Article{ouchi16supertag,
Title                    = {Transition-Based Dependency Parsing Exploiting Supertags},
Author                   = {H. Ouchi and K. Duh and H. Shindo and Y. Matsumoto},
Journal                  = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
Year                     = {2016},
Month                    = {Nov},
Number                   = {11},
Pages                    = {2059-2068},
Volume                   = {24},
Doi                      = {10.1109/TASLP.2016.2598310},
ISSN                     = {2329-9290},
Keywords                 = {grammars;natural language processing;English parsing;Penn Treebank;Universal Dependencies data sets;multilingual parsing;supertag sets;transition-based dependency parsing;Cats;Grammar;IEEE transactions;Magnetic heads;Speech;Speech processing;Syntactics;Dependency parsing;multilingual dependency parsing;supertags;transition-based dependency parsing},
}



@InProceedings{yung16connectives,
  Title                    = {Modelling the Interpretation of Discourse Connectives by Bayesian Pragmatics},
  Author                   = {Yung, Frances and Duh, Kevin and Komura, Taku and Matsumoto, Yuji},
  Booktitle                = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
  Year                     = {2016},
  Address                  = {Berlin, Germany},
  Month                    = {August},
  Pages                    = {531--536},
  Publisher                = {Association for Computational Linguistics},
  Url                      = {http://anthology.aclweb.org/P16-2086}
}

  

@InProceedings{tsubaki16nonlinear,
  Title                    = {Non-Linear Similarity Learning for Compositionality},
  Author                   = {Masashi Tsubaki and Kevin Duh and Masashi Shimbo and Yuji Matsumoto},
  Booktitle                = {AAAI Conference on Artificial Intelligence},
  Year                     = {2016},
  Abstract                 = {Many NLP applications rely on the existence ofsimilarity measures over text data.Although word vector space modelsprovide good similarity measures between words,phrasal and sentential similarities derived from compositionof individual words remain as a difficult problem.In this paper, we propose a new method of ofnon-linear similarity learning for semantic compositionality.In this method, word representations are learnedthrough the similarity learning of sentencesin a high-dimensional space with kernel functions.On the task of predicting the semantic similarity oftwo sentences (SemEval 2014, Task 1),our method outperforms linear baselines,feature engineering approaches,recursive neural networks,and achieve competitive results with long short-term memory models.},
  Url                      = {http://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/view/12373}
}
  

@InProceedings{wu16hws,
  Title                    = {A Generalized Framework for Hierarchical Word Sequence Language Model},
  Author                   = {Xiaoyi Wu and Kevin Duh and Yuji Matsumoto},
  Booktitle                = {Proc. of the Pacific Asia Conference on Language, Information and Computation},
  Year                     = {2016},
}
 

  

@article{DBLP:journals/corr/FriedD14a,
  author    = {Daniel Fried and Kevin Duh},
  title     = {Incorporating Both Distributional and Relational Semantics in Word Representations},
  journal   = {CoRR},
  volume    = {abs/1412.5836},
  year      = {2014},
  url       = {http://arxiv.org/abs/1412.5836},
  biburl    = {http://dblp.uni-trier.de/rec/bib/journals/corr/FriedD14a},
  bibsource = {dblp computer science bibliography, http://dblp.org}
}


@INPROCEEDINGS{liu15multitask,
  author = {Xiaodong Liu and Jianfeng Gao and Xiaodong He and Li Deng and Kevin
	Duh and Ye-Yi Wang},
  title = {Representation Learning Using Multi-Task Deep Neural Networks for
	Semantic Classification and Information Retrieval},
  booktitle = {Proceedings of the 2015 Conference of the North American Chapter
	of the Association for Computational Linguistics: Human Language
	Technologies},
  year = {2015},
}


@INPROCEEDINGS{neubig15multitarget,
  author = {Graham Neubig and Philip Arthur and Kevin Duh},
  title = {Multi-Target Machine Translation with Multi-Synchronous Context-free
	Grammar},
  booktitle = {Proceedings of the 2015 Conference of the North American Chapter
	of the Association for Computational Linguistics: Human Language
	Technologies},
  year = {2015},
}


@InProceedings{yung15annotation,
  author    = {Yung, Frances  and  Duh, Kevin  and  Matsumoto, Yuji},
  title     = {Sequential Annotation and Chunking of Chinese Discourse Structure},
  booktitle = {Proceedings of the Eighth SIGHAN Workshop on Chinese Language Processing},
  month     = {July},
  year      = {2015},
  address   = {Beijing, China},
  publisher = {Association for Computational Linguistics},
  pages     = {1--6},
  url       = {http://www.aclweb.org/anthology/W15-3101},
}


@InProceedings{ouchi15pas,
 Title                    = {Joint Case Argument Identification for Japanese Predicate Argument Structure Analysis},
  Author                   = {Ouchi, Hiroki and Shindo, Hiroyuki and Duh, Kevin and Matsumoto, Yuji},
  Booktitle                = {Proceedings of the 53rd Annual Meeting of the Association for Computational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
  Year                     = {2015},
  Address                  = {Beijing, China},
  Month                    = {July},
  Pages                    = {961--970},
  Publisher                = {Association for Computational Linguistics},
  Url                      = {http://www.aclweb.org/anthology/P15-1093}
}


@InProceedings{cheng15synthetic,
  Title                    = {Synthetic Word Parsing Improves Chinese Word Segmentation},
  Author                   = {Cheng, Fei and Duh, Kevin and Matsumoto, Yuji},
  Year                     = {2015},
  Address                  = {Beijing, China},
  Month                    = {July},
  Pages                    = {262--267},
  Publisher                = {Association for Computational Linguistics},
  Url                      = {http://www.aclweb.org/anthology/P15-2043}
}


@Article{liu15lexicon,
  Title                    = {Multilingual Topic Models for Bilingual Dictionary Extraction},
  Author                   = {Liu, Xiaodong and Duh, Kevin and Matsumoto, Yuji},
  Journal                  = {ACM Trans. Asian Low-Resour. Lang. Inf. Process.},
  Year                     = {2015},
  Month                    = jun,
  Number                   = {3},
  Pages                    = {11:1--11:22},
  Volume                   = {14},
  Acmid                    = {2699939},
  Address                  = {New York, NY, USA},
  Articleno                = {11},
  Doi                      = {10.1145/2699939},
  ISSN                     = {2375-4699},
  Issue_date               = {June 2015},
  Keywords                 = {Bilingual dictionary, comparable corpus, multilingual topic model},
  Numpages                 = {22},
  Publisher                = {ACM},
  Url                      = {http://doi.acm.org/10.1145/2699939}
}


@InProceedings{moriya15automation,
  Title                    = {Automation of System Building for State-of-the-art Large Vocabulary Speech Recognition using Evolution Strategy},
  Author                   = {Takafumi Moriya and Tomohiro Tanaka and Takahiro Shinozaki and Shinji Watanabe and Kevin Duh},
  Booktitle                = {Proceedings of the IEEE 2015 Automatic Speech Recognition and Understanding Workshop (ASRU)},
  Year                     = {2015},
}



@INPROCEEDINGS{neubig14tree,
  author = {Graham Neubig and Kevin Duh},
  title = {On the Elements of an Accurate Tree-to-String Machine Translation System},
  booktitle = {The 52nd Annual Meeting of the Association for Computational Linguistics (ACL) Short Paper Track},
  year = {2014},
  address = {Baltimore, USA},
  month = {June},
}


@INPROCEEDINGS{neubig14ted,
    title = {The {NAIST}-{NTT} {TED} Talk Treebank},
    author = {Graham Neubig and Katsuhito Sudoh and Yusuke Oda and Kevin Duh and Hajime Tsukada and Masaaki Nagata},
    booktitle = {International Workshop on Spoken Language Translation (IWSLT)},
    address = {Lake Tahoe, USA},
    month = {December},
    year = {2014}
}


@InProceedings{jatowt14change,
  Title                    = {A Framework for Analyzing Semantic Change of Words across Time},
  Author                   = {Adam Jatowt and Kevin Duh},
  Booktitle                = {Proceedings of the Joint JCDL/TPDL Digital Libraries Conference},
  Year                     = {2014},
}


@INPROCEEDINGS{liu14character,
  author = {Xiaodong Liu and Kevin Duh and Yuji Matsumoto and Tomoya Iwakura},
  title = {Learning Character Representations for Chinese Word Segmentation},
  booktitle = {NIPS 2014 Workshop on Modern Machine Learning and Natural Language Processing},
  year = {2014},
}



@ARTICLE{kimura14curation,
  author = {Akisato Kimura and Kevin Duh and Tsutomu Hirao and Katsuhiko Ishiguro and Tomoharu Iwata and Ching-Man Au Yeung},
  title = {Creating Stories from Socially Curated Microblog Messages},
  journal = {IEICE TRANSACTIONS on Information and Systems},
  year = {2014},
  number = {6},
  month = {June},
  pages = {1557-1566},
  volume = {E97-D},
}


@INPROCEEDINGS{ouchi14supertag,
  author = {Ouchi, Hiroki and Duh, Kevin and Matsumoto, Yuji},
  title = {Improving Dependency Parsers with Supertags},
  booktitle = {Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics, volume 2: Short Papers},
  year = {2014},
  pages = {154--158},
  address = {Gothenburg, Sweden},
  month = {April},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/E14-4030}
}


@INPROCEEDINGS{yung14unalignable,
  author = {Yung, Frances and Duh, Kevin and Matsumoto, Yuji},
  title = {Analysis and Prediction of Unalignable Words in Parallel Text},
  booktitle = {Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics, volume 2: Short Papers},
  year = {2014},
  pages = {190--194},
  address = {Gothenburg, Sweden},
  month = {April},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/E14-4037}
}


@INPROCEEDINGS{cheng14chinese,
  author = {Fei Cheng and Kevin Duh and Yuji Matsumoto},
  title = {Parsing Chinese Synthetic Words with a Character-based Dependency Model},
  booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)},
  year = {2014},
  address = {Reykjavik, Iceland},
  month = {May},
  publisher = {European Language Resources Association (ELRA)},
  date = {26-31},
  isbn = {978-2-9517408-8-4},
}


@INPROCEEDINGS{pereira14collocation,
  author = {Pereira, Lis and Strafella, Elga and Duh, Kevin and Matsumoto, Yuji},
  title = {Identifying collocations using cross-lingual association measures},
  booktitle = {Proceedings of the 10th Workshop on Multiword Expressions (MWE)},
  year = {2014},
  pages = {109--113},
  address = {Gothenburg, Sweden},
  month = {April},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/W14-0819}
}


@INPROCEEDINGS{sudoh14iwslt,
    title = {{NTT}-{NAIST} Syntax-based {SMT} Systems for {IWSLT} 2014},
    author = {Katsuhito Sudoh and Graham Neubig and Kevin Duh and Katsuhiko Hayashi},
    booktitle = {International Workshop on Spoken Language Translation (IWSLT)},
    address = {Lake Tahoe, USA},
    month = {December},
    year = {2014}
}




@INPROCEEDINGS{duh13neural,
  author = {Duh, Kevin and Neubig, Graham, Sudoh, Katsuhito and Tsukada, Hajime},
  title = {Adaptation Data Selection using Neural Language Models: Experiments
	in Machine Translation},
  booktitle = {Proceedings of the 51th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
  year = {2013},
  address = {Sofia, Bulgaria},
  month = {August},
  publisher = {Association for Computational Linguistics},
}



@ARTICLE{duh13disparity,
  author = {Duh, Kevin and Yeung, Ching-Man Au and Iwata, Tomoharu and Nagata, Masaaki},
  title = {Managing information disparity in multilingual document collections},
  journal = {ACM Trans. Speech Lang. Process.},
  year = {2013},
  number = {1},
  month = mar,
  pages = {1:1--1:28},
  volume = {10},
  acmid = {2442077},
  address = {New York, NY, USA},
  articleno = {1},
  doi = {http://dx.doi.org/10.1145/2442076.2442077},
  issue_date = {March 2013},
}


@INPROCEEDINGS{tsubaki13cocomp,
  author = {Tsubaki, Masashi and Duh, Kevin and Shimbo, Masashi and Matsumoto,
	Yuji},
  title = {Modeling and Learning Semantic Co-Compositionality through Prototype
	Projections and Neural Networks},
  booktitle = {Proceedings of the 2013 Conference on Empirical Methods in Natural
	Language Processing},
  year = {2013},
  pages = {130--140},
  address = {Seattle, Washington, USA},
  month = {October},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/D13-1014}
}


@INPROCEEDINGS{luo13srl,
  author    = {Luo, Yanyan  and  Duh, Kevin  and  Matsumoto, Yuji},
  title     = {What Information is Helpful for Dependency Based Semantic Role Labeling},
  booktitle = {Proceedings of the Sixth International Joint Conference on Natural Language Processing},
  month     = {October},
  year      = {2013},
  address   = {Nagoya, Japan},
  publisher = {Asian Federation of Natural Language Processing},
  pages     = {781--787},
  url       = {http://www.aclweb.org/anthology/I13-1094},
}


@INPROCEEDINGS{liu13chinese,
  author = {Liu, Xiaodong and Cheng, Kevin and Luo, Yanyan and Duh, Kevin and Matsumoto, Yuji},
  title = {A Hybrid Chinese Spelling Correction Using Language Model and Statistical Machine Translation with Reranking},
  booktitle = {Proceedings of the Seventh SIGHAN Workshop on Chinese Language Processing},
  year = {2013},
  pages = {54--58},
  address = {Nagoya, Japan},
  month = {October},
  publisher = {Asian Federation of Natural Language Processing},
  url = {http://www.aclweb.org/anthology/W13-4409}
}



@INPROCEEDINGS{liu13lexicon,
  author = {Xiaodong Liu and Kevin Duh and Yuji Matsumoto},
  title = {Topic Models + Word Alignment = A Flexible Framework for Extracting
	Bilingual Dictionary from Comparable Corpus},
  booktitle = {Proceedings of the Conference on Computational Natural Language Learning},
  year = {2013},
}


@INPROCEEDINGS{kondo13align,
  author = {Kondo, Shuhei and Duh, Kevin and Matsumoto, Yuji},
  title = {Hidden Markov Tree Model for Word Alignment},
  booktitle = {Proceedings of the Eighth Workshop on Statistical Machine Translation},
  year = {2013},
  address = {Sofia, Bulgaria},
  month = {August},
  publisher = {Association for Computational Linguistics},
}


@InProceedings{sankaran-sarkar-duh:2013:NAACL-HLT,
  author    = {Sankaran, Baskaran  and  Sarkar, Anoop  and  Duh, Kevin},
  title     = {Multi-Metric Optimization Using Ensemble Tuning},
  booktitle = {Proceedings of the 2013 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
  month     = {June},
  year      = {2013},
  address   = {Atlanta, Georgia},
  publisher = {Association for Computational Linguistics},
  pages     = {947--957},
  url       = {http://www.aclweb.org/anthology/N13-1115}
}


@INPROCEEDINGS{neubig13info,
  author = {Graham Neubig and Kevin Duh},
  title = {How much is said in a tweet? A multilingual, information-theoretic perspective},
  booktitle = {AAAI Spring Symposium on Analyzing Microtext},
  year = {2013},
}


@INPROCEEDINGS{sudoh13iwslt,
  author = {Katsuhito Sudoh and Graham Neubig and Kevin Duh and Hajime Tsukada},
  title = {{NTT-NAIST} {SMT} Systems for {IWSLT2013}},
  booktitle = {Proceedings of the 10th International Workshop on Spoken Language
	Translation (IWSLT)},
  year = {2013},
}


@ARTICLE{sudoh13talip,
 author = {Sudoh, Katsuhito and Wu, Xianchao and Duh, Kevin and Tsukada, Hajime and Nagata, Masaaki},
 title = {Syntax-Based Post-Ordering for Efficient Japanese-to-English Translation},
 issue_date = {August 2013},
 volume = {12},
 number = {3},
 month = aug,
 year = {2013},
 issn = {1530-0226},
 pages = {12:1--12:15},
 articleno = {12},
 numpages = {15},
 url = {http://doi.acm.org/10.1145/2499955.2499960},
 doi = {10.1145/2499955.2499960},
 acmid = {2499960},
 publisher = {ACM},
 address = {New York, NY, USA},
 journal = {ACM Transactions on Asian Language Processing (TALIP)},
}




@INPROCEEDINGS{duh12curation,
  author = {Kevin Duh and Tsutomu Hirao and Akisato Kimura and Katsuhiko Ishiguro and Tomoharu Iwata and Ching-Man Au Yeung},
  title = {Creating Stories: Social Curation of Twitter Messages},
  booktitle = {International AAAI Conference on Weblogs and Social Media},
  year = {2012},
  url = {http://www.aaai.org/ocs/index.php/ICWSM/ICWSM12/paper/view/4578/5028}
}


@ARTICLE{duh12ipm,
  author = {Kevin Duh and Akinori Fujino},
  title = {Flexible sample selection strategies for transfer learning in ranking},
  journal = {Information Processing \& Management},
  year = {2012},
  number = {3},
  pages = {502 - 512},
  volume = {48},
  doi = {10.1016/j.ipm.2011.05.002},
  issn = {0306-4573},
  url = {http://www.sciencedirect.com/science/article/pii/S0306457311000562}
}


@INPROCEEDINGS{duh12multiobj,
  author = {Kevin Duh and Katsuhito Sudoh and Xianchao Wu and Hajime Tsukada and Masaaki Nagata},
  title = {Learning to Translate with Multiple Objectives},
  booktitle = {Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},
  year = {2012},
  publisher = {Association for Computational Linguistics},
}


@INPROCEEDINGS{hayashi12sancl,
  author = {Katsuhiko Hayashi and Shuhei Kondo and Kevin Duh and Yuji Matsumoto},
  title = {The NAIST Dependency Parser for SANCL2012 Shared Task},
  booktitle = {Notes of the First Workshop on Syntactic Analysis of Non-Canonical Language (SANCL)},
  year = {2012},
}


@ARTICLE{isozaki12hfe,
  author = {Isozaki, Hideki and Sudoh, Katsuhito and Tsukada, Hajime and Duh, Kevin},
  title = {HPSG-Based Preprocessing for English-to-Japanese Translation},
  journal = {ACM Transactions on Asian Language Processing},
  year = {2012},
  number = {3},
  month = sep,
  pages = {8:1--8:16},
  volume = {11},
  acmid = {2334802},
  address = {New York, NY, USA},
  articleno = {8},
  doi = {10.1145/2334801.2334802},
  issue_date = {September 2012},
  keywords = {English, HPSG, Japanese, Machine translation, SOV, SVO},
  publisher = {ACM},
  url = {http://doi.acm.org/10.1145/2334801.2334802}
}



@INPROCEEDINGS{iwata12bidir,
  author = {Tomoharu Iwata and Kevin Duh},
  title = {Bidirectional Semi-Supervised Learning with Graphs},
  booktitle = {Proceedings of the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML/PKDD)},
  year = {2012},
}


@INPROCEEDINGS{han12chinese,
  author = {Han, Dan and Sudoh, Katsuhito and Wu, Xianchao and Duh, Kevin and Tsukada, Hajime and Nagata, Masaaki},
  title = {Head Finalization Reordering for Chinese-to-Japanese Machine Translation},
  booktitle = {Proceedings of the Sixth Workshop on Syntax, Semantics and Structure in Statistical Translation},
  year = {2012},
  pages = {57--66},
  address = {Jeju, Republic of Korea},
  month = {July},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/W12-4207}
}



@INPROCEEDINGS{wu12comparison,
  author = {Xianchao Wu and Katsuhito Sudoh and Kevin Duh and Hajime Tsukada and Masaaki Nagata},
  title = {A Comparative Study of Target Dependency Structures for Statistical Machine Translation},
  booktitle = {Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},
  year = {2012},
}


@INPROCEEDINGS{naist12iwslt,
    title = {The {NAIST} Machine Translation System for {IWSLT} 2012},
    author = {Graham Neubig and Kevin Duh and Masaya Ogushi and Takatomo Kano and Tetsuo Kiso and Sakriani Sakti and Tomoki Toda and Satoshi Nakamura},
    booktitle = {International Workshop on Spoken Language Translation (IWSLT)},
    address = {Hong Kong},
    month = {12},
    year = {2012},
}


@INPROCEEDINGS{auyeung11assisting,
  author = {Ching-man {Au Yeung} and Kevin Duh and Nagata Masaaki},
  title = {Providing Cross-lingual Editing Assistance to Wikipedia Users},
  booktitle = {Proceedings of the 12th International Conference on Intelligent Text Processing and Computational Linguistics},
  year = {2011},
}


@ARTICLE{basu11blrd,
  author = {Sumit Basu and John Dunagan and Kevin Duh and Kiran-Kumar Munuswamy-Reddy},
  title = {Bilinear Logistic Regression for Factored Diagnosis Problems},
  journal = {Operating Systems Review},
  year = {2011},
  number = {3},
  pages = {31--38},
  volume = {45},
}


@INPROCEEDINGS{duh11admm,
  author = {Kevin Duh and Jun Suzuki and Masaaki Nagata},
  title = {Distributed Learning-to-Rank on Streaming Data using Alternating Direction Method of Multipliers},
  booktitle = {NIPS'11 Big Learning Workshop},
  year = {2011},
}


@INPROCEEDINGS{duh11bayesalign,
  author = {Kevin Duh and Katsuhito Sudoh and Tomoharu Iwata and Hajime Tsukada},
  title = {Alignment Inference and Bayesian Adaptation for Machine Translation},
  booktitle = {Proceedings of the Machine Translation Summit XIII},
  year = {2011},
}


@ARTICLE{duh11csl,
  author = {Kevin Duh and Katrin Kirchhoff},
  title = {Semi-supervised ranking for document retrieval},
  journal = {Computer Speech \& Language},
  year = {2011},
  number = {2},
  pages = {261 - 281},
  volume = {25},
  doi = {DOI: 10.1016/j.csl.2010.05.002},
  issn = {0885-2308},
  url = {http://www.sciencedirect.com/science/article/pii/S0885230810000392}
}


@INPROCEEDINGS{duh11gmbr,
  author = {Duh, Kevin and Sudoh, Katsuhito and Wu, Xianchao and Tsukada, Hajime and Nagata, Masaaki},
  title = {Generalized Minimum Bayes Risk System Combination},
  booktitle = {Proceedings of 5th International Joint Conference on Natural Language Processing},
  year = {2011},
  pages = {1356--1360},
  address = {Chiang Mai, Thailand},
  month = {November},
  publisher = {Asian Federation of Natural Language Processing},
  url = {http://www.aclweb.org/anthology/I11-1153}
}


@INPROCEEDINGS{duh11sentiment,
  author = {Duh, Kevin and Fujino, Akinori and Nagata, Masaaki},
  title = {Is Machine Translation Ripe for Cross-Lingual Sentiment Classification?},
  booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},
  year = {2011},
  pages = {429--433},
  address = {Portland, Oregon, USA},
  month = {June},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/P11-2075}
}


@INPROCEEDINGS{kondo11ntcir,
  author = {Shuhei Kondo and Mamoru Komachi and Yuji Matsumoto and Katsuhito Sudoh and Kevin Duh and Hajime Tsukada},
  title = {Learning of Linear Ordering Problems and its Application to J-E Patent Translation in NTCIR-9 PatentMT},
  booktitle = {Proceedings of the NTCIR-9 Workshop Meeting},
  year = {2011},
}


@INPROCEEDINGS{sudoh11ntcir,
  author = {Katsuhito Sudoh and Kevin Duh and Hajime Tsukada and Masaaki Nagata and Xianchao Wu and Takuya Matsuzaki and Jun'ichi Tsujii},
  title = {NTT-UT Statistical Machine Translation in NTCIR-9 PatentMT},
  booktitle = {Proceedings of the NTCIR-9 Workshop Meeting, 2011},
  year = {2011},
}


@INPROCEEDINGS{sudoh11postorder,
  author = {Katsuhito Sudoh and Xianchao Wu and Kevin Duh and Hajime Tsukada and Masaaki Nagata},
  title = {Post-ordering in Statistical Machine Translation},
  booktitle = {Proceedings of the Machine Translation Summit XIII},
  year = {2011},
}


@INPROCEEDINGS{suzuki11pso,
  author = {Suzuki, Jun and Duh, Kevin and Nagata, Masaaki},
  title = {Distributed Minimum Error Rate Training of SMT using Particle Swarm Optimization},
  booktitle = {Proceedings of 5th International Joint Conference on Natural Language Processing},
  year = {2011},
  pages = {649--657},
  address = {Chiang Mai, Thailand},
  month = {November},
  publisher = {Asian Federation of Natural Language Processing},
  url = {http://www.aclweb.org/anthology/I11-1073}
}


@INPROCEEDINGS{wu11predicate,
  author = {Wu, Xianchao and Sudoh, Katsuhito and Duh, Kevin and Tsukada, Hajime and Nagata, Masaaki},
  title = {Extracting Pre-ordering Rules from Predicate-Argument Structures},
  booktitle = {Proceedings of 5th International Joint Conference on Natural Language Processing},
  year = {2011},
  pages = {29--37},
  address = {Chiang Mai, Thailand},
  month = {November},
  publisher = {Asian Federation of Natural Language Processing},
  url = {http://www.aclweb.org/anthology/I11-1004}
}


@INPROCEEDINGS{wu11preorder,
  author = {Xianchao Wu and Katsuhito Sudoh and Kevin Duh and Hajime Tsukada and Masaaki Nagata},
  title = {Extracting Pre-ordering Rules from Chunk-based Dependency Trees for Japanese-to-English Translation},
  booktitle = {Proceedings of the Machine Translation Summit XIII},
  year = {2011},
}


@INPROCEEDINGS{duh10analysis,
  author = {Kevin Duh and Katsuhito Sudoh and Hajime Tsukada},
  title = {Analysis of Translation Model Adaptation for Statistical Machine Translation},
  booktitle = {Proceedings of the International Workshop on Spoken Language Translation (IWSLT) - Technical Papers Track},
  year = {2010},
}


@INPROCEEDINGS{duh10multitask,
  author = {Duh, Kevin and Sudoh, Katsuhito and Tsukada, Hajime and Isozaki, Hideki and Nagata, Masaaki},
  title = {N-Best Reranking by Multitask Learning},
  booktitle = {Proceedings of the Joint Fifth Workshop on Statistical Machine Translation and MetricsMATR},
  year = {2010},
  pages = {375--383},
  address = {Uppsala, Sweden},
  month = {July},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/W10-1757}
}


@INPROCEEDINGS{fujita10semeval,
  author = {Fujita, Sanae and Duh, Kevin and Fujino, Akinori and Taira, Hirotoshi and Shindo, Hiroyuki},
  title = {MSS: Investigating the Effectiveness of Domain Combinations and Topic Features for Word Sense Disambiguation},
  booktitle = {Proceedings of the 5th International Workshop on Semantic Evaluation},
  year = {2010},
  pages = {383--386},
  address = {Uppsala, Sweden},
  month = {July},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/S10-1086}
}


@INPROCEEDINGS{hayashi10hier,
  author = {Hayashi, Katsuhiko and Tsukada, Hajime and Sudoh, Katsuhito and Duh, Kevin and Yamamoto, Seiichi},
  title = {Hierarchical Phrase-based Machine Translation with Word-based Reordering Model},
  booktitle = {Proceedings of the 23rd International Conference on Computational Linguistics (Coling 2010)},
  year = {2010},
  pages = {439--446},
  address = {Beijing, China},
  month = {August},
  publisher = {Coling 2010 Organizing Committee},
  url = {http://www.aclweb.org/anthology/C10-1050}
}


@INPROCEEDINGS{isozaki10eval,
  author = {Isozaki, Hideki and Hirao, Tsutomu and Duh, Kevin and Sudoh, Katsuhito and Tsukada, Hajime},
  title = {Automatic Evaluation of Translation Quality for Distant Language Pairs},
  booktitle = {Proceedings of the 2010 Conference on Empirical Methods in Natural Language Processing},
  year = {2010},
  pages = {944--952},
  address = {Cambridge, MA},
  month = {October},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/D10-1092}
}


@INPROCEEDINGS{isozaki10hfe,
  author = {Isozaki, Hideki and Sudoh, Katsuhito and Tsukada, Hajime and Duh, Kevin},
  title = {Head Finalization: A Simple Reordering Rule for SOV Languages},
  booktitle = {Proceedings of the Joint Fifth Workshop on Statistical Machine Translation and MetricsMATR},
  year = {2010},
  pages = {244--251},
  address = {Uppsala, Sweden},
  month = {July},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/W10-1736}
}


@INPROCEEDINGS{sudoh10dividetranslate,
  author = {Sudoh, Katsuhito and Duh, Kevin and Tsukada, Hajime and Hirao, Tsutomu and Nagata, Masaaki},
  title = {Divide and Translate: Improving Long Distance Reordering in Statistical Machine Translation},
  booktitle = {Proceedings of the Joint Fifth Workshop on Statistical Machine Translation and MetricsMATR},
  year = {2010},
  pages = {418--427},
  address = {Uppsala, Sweden},
  month = {July},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/W10-1762}
}


@INPROCEEDINGS{sudoh10iwslt,
  author = {Katsuhito Sudoh and Kevin Duh and Hajime Tsukada},
  title = {{NTT Statistical Machine Translation System for IWSLT 2010}},
  booktitle = {Proceedings of the 7th International Workshop on Spoken Language Translation (IWSLT)},
  year = {2010},
  editor = {Marcello Federico and Ian Lane and Michael Paul and Fran\c{c}ois Yvon},
  pages = {147--152},
  location = {Paris, France},
}


@PHDTHESIS{duh09phd,
  author = {Kevin K. Duh},
  title = {Learning to Rank with Partially-Labeled Data},
  school = {University of Washington},
  year = {2009},
}


@INPROCEEDINGS{yang09iwslt,
  author = {Mei Yang and Amittai Axelrod and Kevin Duh and Katrin Kirchhoff},
  title = {The University of Washington Machine Translation System for IWSLT 2009},
  booktitle = {Proceedings of the International Workshop on Spoken Language Translation},
  year = {2009},
}


@INPROCEEDINGS{axelrod08wmt,
  author = {Axelrod, Amittai and Yang, Mei and Duh, Kevin and Kirchhoff, Katrin},
  title = {The {University} of {Washington} Machine Translation System for {ACL} {WMT} 2008},
  booktitle = {Proceedings of the Third Workshop on Statistical Machine Translation},
  year = {2008},
  pages = {123--126},
  address = {Columbus, Ohio},
  month = {June},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/W/W08/W08-0314}
}


@INPROCEEDINGS{duh08boost,
  author = {Duh, Kevin and Kirchhoff, Katrin},
  title = {Beyond Log-Linear Models: Boosted Minimum Error Rate Training for N-best Re-ranking},
  booktitle = {Proceedings of ACL-08: HLT, Short Papers},
  year = {2008},
  pages = {37--40},
  address = {Columbus, Ohio},
  month = {June},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/P/P08/P08-2010}
}


@INPROCEEDINGS{duh08eval,
  author = {Duh, Kevin},
  title = {Ranking vs. Regression in Machine Translation Evaluation},
  booktitle = {Proceedings of the Third Workshop on Statistical Machine Translation},
  year = {2008},
  pages = {191--194},
  address = {Columbus, Ohio},
  month = {June},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/W/W08/W08-0331}
}


@INPROCEEDINGS{duh08hcir,
  author = {Kevin Duh and Shawn Medero and Mike Shultz and Tom Eng},
  title = {Beyond the Search Box: Helping Users find Health information on the Web},
  booktitle = {HCIR: Workshop on Human-Computer Interaction and Information Retrieval},
  year = {2008},
}


@INPROCEEDINGS{duh08sigir,
  author = {Duh, Kevin and Kirchhoff, Katrin},
  title = {Learning to rank with partially-labeled data},
  booktitle = {Proceedings of the 31st annual international ACM SIGIR conference on Research and development in information retrieval},
  year = {2008},
  series = {SIGIR '08},
  pages = {251--258},
  address = {New York, NY, USA},
  publisher = {ACM},
  acmid = {1390379},
  doi = {10.1145/1390334.1390379},
  isbn = {978-1-60558-164-4},
  location = {Singapore, Singapore},
  numpages = {8},
  url = {http://doi.acm.org/10.1145/1390334.1390379}
}


@TECHREPORT{UWEE08-FLMtutorial,
  author = {Katrin Kirchhoff and Jeff Bilmes and Kevin Duh},
  title = {Factored Langauge Models - a Tutorial},
  institution = {University of Washington, Department of Electrical Engineering},
  year = {2008},
}


@INPROCEEDINGS{corstonoliver06bpm,
  author = {Corston-Oliver, Simon and Aue, Anthony and Duh, Kevin and Ringger, Eric},
  title = {Multilingual Dependency Parsing using Bayes Point Machines},
  booktitle = {Proceedings of the Human Language Technology Conference of the NAACL, Main Conference},
  year = {2006},
  pages = {160--167},
  address = {New York City, USA},
  month = {June},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/N/N06/N06-1021}
}


@INPROCEEDINGS{duh06lexicon,
  author = {Duh, Kevin and Kirchhoff, Katrin},
  title = {Lexicon Acquisition for Dialectal Arabic Using Transductive Learning},
  booktitle = {Proceedings of the 2006 Conference on Empirical Methods in Natural Language Processing},
  year = {2006},
  pages = {399--407},
  address = {Sydney, Australia},
  month = {July},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/W/W06/W06-1647}
}


@ARTICLE{kirchhoff06arabic,
  author = {Katrin Kirchhoff and Dimitra Vergyri and Jeff Bilmes and Kevin Duh and Andreas Stolcke},
  title = {Morphology-based language modeling for conversational Arabic speech recognition},
  journal = {Computer Speech and Language},
  year = {2006},
  number = {4},
  volume = {20},
}


@INPROCEEDINGS{kirchhoff06iwslt,
  author = {Katrin Kirchhoff and Kevin Duh and Chris Lim},
  title = {The University of Washington Machine Translation System for IWSLT 2006},
  booktitle = {Proceedings of the International Workshop on Spoken Language Translation (IWSLT)},
  year = {2006},
}


@INPROCEEDINGS{kirchhoff06tcstar,
  author = {Katrin Kirchhoff and Mei Yang and Kevin Duh},
  title = {Statistical Machine Translation of Parliamentary Proceedings Using Morpho-Syntactic Knowledge},
  booktitle = {Proceedings of the TC-STAR Workshop on Speech to Speech Translation},
  year = {2006},
  address = {Barcelona, Spain},
}


@INPROCEEDINGS{bartels05triangulation,
  author = {Chris Bartels and Kevin Duh and Jeff Bilmes and Katrin Kirchhoff and Simon King},
  title = {Genetic Triangulation of Graphical Models for Speech and Language Processing},
  booktitle = {Proceedings of Interspeech/Eurospeech},
  year = {2005},
}


@INPROCEEDINGS{duh05factorial,
  author = {Duh, Kevin},
  title = {Jointly Labeling Multiple Sequences: A Factorial {HMM} Approach},
  booktitle = {Proceedings of the ACL Student Research Workshop},
  year = {2005},
  pages = {19--24},
  address = {Ann Arbor, Michigan},
  month = {June},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/P/P05/P05-2004}
}


@INPROCEEDINGS{duh05multilabel,
  author = {Kevin Duh and Katrin Kirchhoff},
  title = {Structured Multi-label Transductive Learning: a Case Study in Lexicon Acquisition},
  booktitle = {NIPS 2005 Workshop on Advances in Structured Learning for Text and Speech Processing},
  year = {2005},
}


@INPROCEEDINGS{duh05pos,
  author = {Duh, Kevin and Kirchhoff, Katrin},
  title = {{POS} Tagging of Dialectal {A}rabic: A Minimally Supervised Approach},
  booktitle = {Proceedings of the ACL Workshop on Computational Approaches to Semitic Languages},
  year = {2005},
  pages = {55--62},
  address = {Ann Arbor, Michigan},
  month = {June},
  publisher = {Association for Computational Linguistics},
  url = {http://www.aclweb.org/anthology/W/W05/W05-0708}
}


@INPROCEEDINGS{duh04lm,
  author = {Kevin Duh and Katrin Kirchhoff},
  title = {Automatic Learning of Language Model Structure},
  booktitle = {Proceedings of the 20th International Conference on Computational Linguistics (COLING)},
  year = {2004},
}


@INPROCEEDINGS{vergyri04arabic,
  author = {Dimitra Vergyri and Katrin Kirchhoff and Kevin Duh, and Andreas Stolcke},
  title = {Morphology-Based Language Modeling for Arabic Speech Recognition},
  booktitle = {Proceedings of International Conference on Spoken Language Processing (ICSLP/Interspeech)},
  year = {2004},
}